diff --git a/frontend/summary/dataPivot/DataPivot.js b/frontend/summary/dataPivot/DataPivot.js index 4e4ddcb5ca..0447fa5e6d 100644 --- a/frontend/summary/dataPivot/DataPivot.js +++ b/frontend/summary/dataPivot/DataPivot.js @@ -44,12 +44,22 @@ class DataPivot { } static get_object(pk, callback) { - const url = `/summary/api/data_pivot/${pk}/`; + const url = `/summary/api/data_pivot/${pk}/`, + handleError = err => { + $("#loading_div").hide(); + handleVisualError(err, $("#dp_display")); + }; fetch(url, h.fetchGet) .then(d => d.json()) .then(d => { fetch(d.data_url, h.fetchGet) + .then(resp => { + if (!resp.ok) { + throw Error(`Invalid server response: ${resp.status}`); + } + return resp; + }) .then(d => d.text()) .then(data => d3.tsvParse(data)) .then(data => { @@ -58,9 +68,9 @@ class DataPivot { callback(dp); } }) - .catch(err => handleVisualError(err, null)); + .catch(handleError); }) - .catch(err => handleVisualError(err, null)); + .catch(handleError); } static displayAsModal(id) { diff --git a/hawc/apps/animal/exports.py b/hawc/apps/animal/exports.py index 4f5beced51..62b9c82a2c 100644 --- a/hawc/apps/animal/exports.py +++ b/hawc/apps/animal/exports.py @@ -1,744 +1,1487 @@ -from copy import copy +import math +from collections import defaultdict + +import numpy as np +import pandas as pd +from django.db.models import CharField, F +from django.db.models.functions import Cast +from django.db.models.lookups import Exact +from scipy import stats from ..assessment.models import DoseUnits -from ..common.helper import FlatFileExporter -from ..materialized.models import FinalRiskOfBiasScore -from ..study.models import Study +from ..bmd.models import Session +from ..common.exports import Exporter, ModelExport +from ..common.helper import FlatFileExporter, cleanHTML +from ..common.models import sql_display, sql_format, str_m2m +from ..materialized.exports import get_final_score_df +from ..study.exports import StudyExport from . import constants, models -def get_gen_species_strain_sex(e, withN=False): - gen = e["animal_group"]["generation"] - if len(gen) > 0: - gen += " " - - ns_txt = "" - if withN: - ns = [eg["n"] for eg in e["groups"] if eg["n"] is not None] - if len(ns) > 0: - ns_txt = ", N=" + models.EndpointGroup.getNRangeText(ns) - - sex_symbol = e["animal_group"]["sex_symbol"] - if sex_symbol == "NR": - sex_symbol = "sex=NR" - - return ( - f"{gen}{e['animal_group']['species']}, {e['animal_group']['strain']} ({sex_symbol}{ns_txt})" - ) +def cont_ci(stdev, n, response): + """ + Two-tailed t-test, assuming 95% confidence interval. + """ + se = stdev / math.sqrt(n) + change = stats.t.ppf(0.975, max(n - 1, 1)) * se + lower_ci = response - change + upper_ci = response + change + return lower_ci, upper_ci -def get_treatment_period(exp, dr): - txt = exp["type"].lower() - if txt.find("(") >= 0: - txt = txt[: txt.find("(")] +def dich_ci(incidence, n): + """ + Add confidence intervals to dichotomous datasets. + https://www.epa.gov/sites/production/files/2020-09/documents/bmds_3.2_user_guide.pdf - if dr["duration_exposure_text"]: - txt = f"{txt} ({dr['duration_exposure_text']})" + The error bars shown in BMDS plots use alpha = 0.05 and so + represent the 95% confidence intervals on the observed + proportions (independent of model). + """ + p = incidence / float(n) + z = stats.norm.ppf(1 - 0.05 / 2) + z2 = z * z + q = 1.0 - p + tmp1 = 2 * n * p + z2 + lower_ci = ((tmp1 - 1) - z * np.sqrt(z2 - (2 + 1 / n) + 4 * p * (n * q + 1))) / (2 * (n + z2)) + upper_ci = ((tmp1 + 1) + z * np.sqrt(z2 + (2 + 1 / n) + 4 * p * (n * q - 1))) / (2 * (n + z2)) + return lower_ci, upper_ci + + +def percent_control(n_1, mu_1, sd_1, n_2, mu_2, sd_2): + mean = low = high = None + + if mu_1 is not None and mu_2 is not None and mu_1 > 0 and mu_2 > 0: + mean = (mu_2 - mu_1) / mu_1 * 100.0 + if sd_1 and sd_2 and n_1 and n_2: + sd = math.sqrt( + pow(mu_1, -2) + * ((pow(sd_2, 2) / n_2) + (pow(mu_2, 2) * pow(sd_1, 2)) / (n_1 * pow(mu_1, 2))) + ) + ci = (1.96 * sd) * 100 + rng = sorted([mean - ci, mean + ci]) + low = rng[0] + high = rng[1] - return txt + return mean, low, high -def get_significance_and_direction(data_type, groups): +def maximum_percent_control_change(changes: list): """ - Get significance and direction; return all possible values as strings. + For each endpoint, return the maximum absolute-change percent control + for that endpoint, or 0 if it cannot be calculated. Useful for + ordering data-pivot results. """ - significance_list = [] - - if len(groups) == 0: - return significance_list - - if data_type in { - constants.DataType.CONTINUOUS, - constants.DataType.PERCENT_DIFFERENCE, - constants.DataType.DICHOTOMOUS, - constants.DataType.DICHOTOMOUS_CANCER, - }: - if data_type in { - constants.DataType.CONTINUOUS, - constants.DataType.PERCENT_DIFFERENCE, - }: - field = "response" - elif data_type in { - constants.DataType.DICHOTOMOUS, - constants.DataType.DICHOTOMOUS_CANCER, - }: - field = "percent_affected" - else: - raise ValueError(f"Unreachable code? data_type={data_type}") - control_resp = groups[0][field] - for group in groups: - if group["significant"]: - resp = group[field] - if control_resp is None or resp is None or resp == control_resp: - significance_list.append("Yes - ?") - elif resp > control_resp: - significance_list.append("Yes - ↑") - else: - significance_list.append("Yes - ↓") - else: - significance_list.append("No") - elif data_type == constants.DataType.NR: - for group in groups: - significance_list.append("?") - else: - raise ValueError("Unreachable code - unable to determine significance/direction") - - return significance_list + val = 0 + + if len(changes) > 0: + min_ = min(changes) + max_ = max(changes) + val = min_ if abs(min_) > abs(max_) else max_ + + return val + + +class ExperimentExport(ModelExport): + def get_value_map(self): + return { + "id": "id", + "url": "url", + "name": "name", + "type_display": "type_display", + "has_multiple_generations": "has_multiple_generations", + "chemical": "chemical", + "cas": "cas", + "dtxsid": "dtxsid", + "chemical_source": "chemical_source", + "purity_available": "purity_available", + "purity_qualifier": "purity_qualifier", + "purity": "purity", + "vehicle": "vehicle", + "guideline_compliance": "guideline_compliance", + "description": "description", + } + + def get_annotation_map(self, query_prefix): + return { + "url": sql_format("/ani/experiment/{}/", query_prefix + "id"), # hardcoded URL + "type_display": sql_display(query_prefix + "type", constants.ExperimentType), + } + + def prepare_df(self, df): + # clean html text + description = self.get_column_name("description") + if description in df.columns: + df.loc[:, description] = df[description].apply(cleanHTML) + return df + + +class AnimalGroupExport(ModelExport): + def get_value_map(self): + return { + "id": "id", + "url": "url", + "name": "name", + "sex_display": "sex_display", + "sex_symbol": "sex_symbol", + "animal_source": "animal_source", + "lifestage_exposed": "lifestage_exposed", + "lifestage_assessed": "lifestage_assessed", + "siblings": "siblings", + "parents_display": "parents_display", + "generation": "generation", + "comments": "comments", + "diet": "diet", + "species_name": "species__name", + "strain_name": "strain__name", + } + + def get_annotation_map(self, query_prefix): + return { + "url": sql_format("/ani/animal-group/{}/", query_prefix + "id"), # hardcoded URL + "sex_display": sql_display(query_prefix + "sex", constants.Sex), + "sex_symbol": sql_display(query_prefix + "sex", models.AnimalGroup.SEX_SYMBOLS), + "parents_display": str_m2m(Cast(query_prefix + "parents", output_field=CharField())), + } + + def prepare_df(self, df): + # clean html text + comments = self.get_column_name("comments") + if comments in df.columns: + df.loc[:, comments] = df[comments].apply(cleanHTML) + return df + + +class DosingRegimeExport(ModelExport): + def get_value_map(self): + return { + "id": "id", + "dosed_animals": "dosed_animals", + "route_of_exposure_display": "route_of_exposure_display", + "duration_exposure": "duration_exposure", + "duration_exposure_text": "duration_exposure_text", + "duration_observation": "duration_observation", + "num_dose_groups": "num_dose_groups", + "positive_control_display": "positive_control_display", + "negative_control_display": "negative_control_display", + "description": "description", + } + + def get_annotation_map(self, query_prefix): + POSITIVE_CONTROL = {k: v for k, v in constants.POSITIVE_CONTROL_CHOICES} + return { + "route_of_exposure_display": sql_display( + query_prefix + "route_of_exposure", constants.RouteExposure + ), + "positive_control_display": sql_display( + query_prefix + "positive_control", POSITIVE_CONTROL + ), + "negative_control_display": sql_display( + query_prefix + "negative_control", constants.NegativeControl + ), + } + + def prepare_df(self, df): + # clean html text + description = self.get_column_name("description") + if description in df.columns: + df.loc[:, description] = df[description].apply(cleanHTML) + return df + + +class EndpointExport(ModelExport): + def get_value_map(self): + return { + "id": "id", + "url": "url", + "name": "name", + "effects_display": "effects_display", + "system": "system", + "organ": "organ", + "effect": "effect", + "effect_subtype": "effect_subtype", + "name_term_id": "name_term_id", + "system_term_id": "system_term_id", + "organ_term_id": "organ_term_id", + "effect_term_id": "effect_term_id", + "effect_subtype_term_id": "effect_subtype_term_id", + "litter_effects": "litter_effects", + "litter_effect_notes": "litter_effect_notes", + "observation_time": "observation_time", + "observation_time_units_display": "observation_time_units_display", + "observation_time_text": "observation_time_text", + "data_location": "data_location", + "response_units": "response_units", + "data_type": "data_type", + "data_type_display": "data_type_display", + "variance_type": "variance_type", + "variance_type_name": "variance_type_name", + "confidence_interval": "confidence_interval", + "data_reported": "data_reported", + "data_extracted": "data_extracted", + "values_estimated": "values_estimated", + "expected_adversity_direction": "expected_adversity_direction", + "expected_adversity_direction_display": "expected_adversity_direction_display", + "monotonicity_display": "monotonicity_display", + "statistical_test": "statistical_test", + "trend_value": "trend_value", + "trend_result_display": "trend_result_display", + "diagnostic": "diagnostic", + "power_notes": "power_notes", + "results_notes": "results_notes", + "endpoint_notes": "endpoint_notes", + "additional_fields": "additional_fields", + } + + def get_annotation_map(self, query_prefix): + return { + "url": sql_format("/ani/endpoint/{}/", query_prefix + "id"), # hardcoded URL + "effects_display": str_m2m(query_prefix + "effects__name"), + "observation_time_units_display": sql_display( + query_prefix + "observation_time_units", constants.ObservationTimeUnits + ), + "data_type_display": sql_display(query_prefix + "data_type", constants.DataType), + "variance_type_name": sql_display( + query_prefix + "variance_type", models.Endpoint.VARIANCE_NAME + ), + "expected_adversity_direction_display": sql_display( + query_prefix + "expected_adversity_direction", constants.AdverseDirection + ), + "monotonicity_display": sql_display( + query_prefix + "monotonicity", constants.Monotonicity + ), + "trend_result_display": sql_display( + query_prefix + "trend_result", constants.TrendResult + ), + } + + def prepare_df(self, df): + # clean html text + results_notes = self.get_column_name("results_notes") + if results_notes in df.columns: + df.loc[:, results_notes] = df[results_notes].apply(cleanHTML) + + endpoint_notes = self.get_column_name("endpoint_notes") + if results_notes in df.columns: + df.loc[:, endpoint_notes] = df[endpoint_notes].apply(cleanHTML) + + return df + + +class EndpointGroupExport(ModelExport): + def get_value_map(self): + return { + "id": "id", + "dose_group_id": "dose_group_id", + "n": "n", + "incidence": "incidence", + "response": "response", + "variance": "variance", + "lower_ci": "lower_ci", + "upper_ci": "upper_ci", + "significant": "significant", + "significance_level": "significance_level", + "treatment_effect": "treatment_effect", + "treatment_effect_display": "treatment_effect_display", + "NOEL": "NOEL", + "LOEL": "LOEL", + "FEL": "FEL", + } + + def get_annotation_map(self, query_prefix): + return { + "treatment_effect_display": sql_display( + query_prefix + "treatment_effect", constants.TreatmentEffect, default=None + ), + "NOEL": Exact(F(query_prefix + "dose_group_id"), F(query_prefix + "endpoint__NOEL")), + "LOEL": Exact(F(query_prefix + "dose_group_id"), F(query_prefix + "endpoint__LOEL")), + "FEL": Exact(F(query_prefix + "dose_group_id"), F(query_prefix + "endpoint__FEL")), + } + + +class DoseGroupExport(ModelExport): + def get_value_map(self): + return { + "id": "id", + "dose_units_id": "dose_units__id", + "dose_units_name": "dose_units__name", + "dose_group_id": "dose_group_id", + "dose": "dose", + } + + +class EndpointGroupFlatCompleteExporter(Exporter): + def build_modules(self) -> list[ModelExport]: + return [ + StudyExport( + "study", + "animal_group__experiment__study", + ), + ExperimentExport( + "experiment", + "animal_group__experiment", + ), + AnimalGroupExport("animal_group", "animal_group", exclude=("sex_symbol",)), + DosingRegimeExport( + "dosing_regime", + "animal_group__dosing_regime", + ), + EndpointExport( + "endpoint", "", exclude=("expected_adversity_direction", "data_type_display") + ), + EndpointGroupExport("endpoint_group", "groups", exclude=("treatment_effect",)), + DoseGroupExport( + "dose_group", "animal_group__dosing_regime__doses", exclude=("dose_units_id",) + ), + ] class EndpointGroupFlatComplete(FlatFileExporter): - """ - Returns a complete export of all data required to rebuild the the - animal bioassay study type from scratch. - """ - - def _get_header_row(self): - self.doses = DoseUnits.objects.get_animal_units_names(self.kwargs.get("assessment")) - - header = [] - header.extend(Study.flat_complete_header_row()) - header.extend(models.Experiment.flat_complete_header_row()) - header.extend(models.AnimalGroup.flat_complete_header_row()) - header.extend(models.DosingRegime.flat_complete_header_row()) - header.extend(models.Endpoint.flat_complete_header_row()) - header.extend([f"doses-{d}" for d in self.doses]) - header.extend(models.EndpointGroup.flat_complete_header_row()) - return header - - def _get_data_rows(self): - rows = [] - identifiers_df = Study.identifiers_df(self.queryset, "animal_group__experiment__study_id") - for obj in self.queryset: - ser = obj.get_json(json_encode=False) - row = [] - row.extend( - Study.flat_complete_data_row( - ser["animal_group"]["experiment"]["study"], identifiers_df + def handle_doses(self, df: pd.DataFrame) -> pd.DataFrame: + # TODO this is really slow; maybe its the filtering to find matching dose group ids? + # solutions: ?, put the burden on SQL w/ Prefetch and Subquery (messy) + # long term solutions: group and dose group should be related + def _func(group_df: pd.DataFrame) -> pd.DataFrame: + # handle case with no dose data + if group_df["dose_group-id"].isna().all(): + return group_df + + # add dose data + group_df["doses-" + group_df["dose_group-dose_units_name"]] = group_df[ + "dose_group-dose" + ].tolist() + + # return a df that is dose agnostic + return group_df.drop_duplicates( + subset=group_df.columns[group_df.columns.str.endswith("-id")].difference( + ["dose_group-id"] ) ) - row.extend(models.Experiment.flat_complete_data_row(ser["animal_group"]["experiment"])) - row.extend(models.AnimalGroup.flat_complete_data_row(ser["animal_group"])) - ser_dosing_regime = ser["animal_group"]["dosing_regime"] - row.extend(models.DosingRegime.flat_complete_data_row(ser_dosing_regime)) - row.extend(models.Endpoint.flat_complete_data_row(ser)) - for i, eg in enumerate(ser["groups"]): - row_copy = copy(row) - ser_doses = ser_dosing_regime["doses"] if ser_dosing_regime else None - row_copy.extend( - models.DoseGroup.flat_complete_data_row(ser_doses, self.doses, i) - if ser_doses - else [None for _ in self.doses] - ) - row_copy.extend(models.EndpointGroup.flat_complete_data_row(eg, ser)) - rows.append(row_copy) - return rows + return ( + df.groupby("endpoint_group-id", group_keys=False, sort=False) + .apply(_func) + .drop( + columns=[ + "dose_group-id", + "dose_group-dose_units_name", + "dose_group-dose_group_id", + "dose_group-dose", + ] + ) + .reset_index(drop=True) + ) + + def handle_stdev(self, df: pd.DataFrame) -> pd.DataFrame: + df["endpoint_group-stdev"] = df.apply( + lambda x: models.EndpointGroup.stdev( + x["endpoint-variance_type"], + x["endpoint_group-variance"], + x["endpoint_group-n"], + ), + axis="columns", + ) + return df.drop(columns=["endpoint-variance_type"]) + + def handle_ci(self, df: pd.DataFrame) -> pd.DataFrame: + def _func(row: pd.Series) -> pd.Series: + # logic used from EndpointGroup.getConfidenceIntervals() + data_type = row["endpoint-data_type"] + lower_ci = row["endpoint_group-lower_ci"] + upper_ci = row["endpoint_group-upper_ci"] + n = row["endpoint_group-n"] + + response = row["endpoint_group-response"] + stdev = row["endpoint_group-stdev"] + incidence = row["endpoint_group-incidence"] + if lower_ci is not None or upper_ci is not None or n is None or n <= 0: + pass + elif ( + data_type == constants.DataType.CONTINUOUS + and response is not None + and stdev is not None + ): + ( + row["endpoint_group-lower_ci"], + row["endpoint_group-upper_ci"], + ) = cont_ci(stdev, n, response) + elif ( + data_type in [constants.DataType.DICHOTOMOUS, constants.DataType.DICHOTOMOUS_CANCER] + and incidence is not None + ): + ( + row["endpoint_group-lower_ci"], + row["endpoint_group-upper_ci"], + ) = dich_ci(incidence, n) + return row + + return df.apply(_func, axis="columns").drop(columns=["endpoint_group-stdev"]) + + def build_df(self) -> pd.DataFrame: + df = EndpointGroupFlatCompleteExporter().get_df( + self.queryset.select_related( + "animal_group__experiment__study", + "animal_group__dosing_regime", + ) + .prefetch_related("groups", "animal_group__dosing_regime__doses") + .order_by("id", "groups", "animal_group__dosing_regime__doses") + ) + df = df[ + pd.isna(df["dose_group-id"]) + | (df["endpoint_group-dose_group_id"] == df["dose_group-dose_group_id"]) + ] + if df.empty: + return df + if obj := self.queryset.first(): + doses = DoseUnits.objects.get_animal_units_names(obj.assessment_id) + + df = df.assign(**{f"doses-{d}": None for d in doses}) + df = self.handle_doses(df) + df["dosing_regime-dosed_animals"] = df["dosing_regime-dosed_animals"].astype(str) + df = self.handle_stdev(df) + df = self.handle_ci(df) + + df = df.rename( + columns={ + "endpoint_group-treatment_effect_display": "endpoint_group-treatment_effect", + "endpoint-expected_adversity_direction_display": "endpoint-expected_adversity_direction", + "experiment-type_display": "experiment-type", + "animal_group-sex_display": "animal_group-sex", + "dosing_regime-positive_control_display": "dosing_regime-positive_control", + "endpoint-effects_display": "endpoint-effects", + "animal_group-parents_display": "animal_group-parents", + "dosing_regime-route_of_exposure_display": "dosing_regime-route_of_exposure", + "endpoint-monotonicity_display": "endpoint-monotonicity", + "dosing_regime-negative_control_display": "dosing_regime-negative_control", + "endpoint-observation_time_units_display": "endpoint-observation_time_units", + "endpoint-trend_result_display": "endpoint-trend_result", + "endpoint-variance_type_name": "endpoint-variance_type", + "animal_group-species_name": "species-name", + "animal_group-strain_name": "strain-name", + } + ) + + return df + + +class EndpointGroupFlatDataPivotExporter(Exporter): + def build_modules(self) -> list[ModelExport]: + return [ + StudyExport( + "study", + "animal_group__experiment__study", + include=("id", "short_citation", "study_identifier", "published"), + ), + ExperimentExport( + "experiment", + "animal_group__experiment", + include=("id", "name", "type_display", "chemical"), + ), + AnimalGroupExport( + "animal_group", + "animal_group", + include=( + "id", + "name", + "lifestage_exposed", + "lifestage_assessed", + "species_name", + "strain_name", + "generation", + "sex_display", + "sex_symbol", + ), + ), + DosingRegimeExport( + "dosing_regime", + "animal_group__dosing_regime", + include=( + "route_of_exposure_display", + "duration_exposure_text", + "duration_exposure", + ), + ), + EndpointExport( + "endpoint", + "", + include=( + "id", + "name", + "system", + "organ", + "effect", + "effect_subtype", + "diagnostic", + "effects_display", + "observation_time", + "observation_time_units_display", + "observation_time_text", + "variance_type", + "data_type", + "data_type_display", + "trend_value", + "trend_result_display", + "expected_adversity_direction", + "response_units", + ), + ), + EndpointGroupExport( + "endpoint_group", + "groups", + include=( + "id", + "dose_group_id", + "n", + "incidence", + "response", + "lower_ci", + "upper_ci", + "significant", + "significance_level", + "treatment_effect_display", + "NOEL", + "LOEL", + "FEL", + "variance", + ), + ), + DoseGroupExport( + "dose_group", + "animal_group__dosing_regime__doses", + include=("id", "dose_units_id", "dose_units_name", "dose_group_id", "dose"), + ), + ] class EndpointGroupFlatDataPivot(FlatFileExporter): - """ - Return a subset of frequently-used data for generation of data-pivot - visualizations. - """ - - @classmethod - def _get_doses_list(cls, ser, preferred_units): - # compact the dose-list to only one set of dose-units; using the - # preferred units if available, else randomly get first available - units_id = None - + def get_preferred_units(self, df: pd.DataFrame) -> int | None: + preferred_units = self.kwargs.get("preferred_units", None) + available_units = df["dose_group-dose_units_id"].dropna().unique() + if available_units.size == 0: + return None if preferred_units: - available_units = set( - [d["dose_units"]["id"] for d in ser["animal_group"]["dosing_regime"]["doses"]] - ) for units in preferred_units: if units in available_units: - units_id = units - break - - if units_id is None: - units_id = ser["animal_group"]["dosing_regime"]["doses"][0]["dose_units"]["id"] - - return [ - d - for d in ser["animal_group"]["dosing_regime"]["doses"] - if units_id == d["dose_units"]["id"] - ] - - @classmethod - def _get_dose_units(cls, doses: list[dict]) -> str: - return doses[0]["dose_units"]["name"] - - @classmethod - def _get_doses_str(cls, doses: list[dict]) -> str: - if len(doses) == 0: - return "" - values = ", ".join([str(float(d["dose"])) for d in doses]) - return f"{values} {cls._get_dose_units(doses)}" - - @classmethod - def _get_dose(cls, doses: list[dict], idx: int) -> float | None: - for dose in doses: - if dose["dose_group_id"] == idx: - return float(dose["dose"]) - return None - - @classmethod - def _get_species_strain(cls, e): - return f"{e['animal_group']['species']} {e['animal_group']['strain']}" - - @classmethod - def _get_observation_time_and_time_units(cls, e): - return f"{e['observation_time']} {e['observation_time_units']}" - - def _get_header_row(self): - # move qs.distinct() call here so we can make qs annotations. - self.queryset = self.queryset.distinct("pk") - if self.queryset.first() is None: - self.rob_headers, self.rob_data = {}, {} - else: - endpoint_ids = set(self.queryset.values_list("id", flat=True)) - self.rob_headers, self.rob_data = FinalRiskOfBiasScore.get_dp_export( - self.queryset.first().assessment_id, - endpoint_ids, - "animal", + return units + return available_units[0] + + def handle_ci(self, df: pd.DataFrame) -> pd.DataFrame: + def _func(row: pd.Series) -> pd.Series: + # logic used from EndpointGroup.getConfidenceIntervals() + data_type = row["endpoint-data_type"] + lower_ci = row["endpoint_group-lower_ci"] + upper_ci = row["endpoint_group-upper_ci"] + n = row["endpoint_group-n"] + + response = row["endpoint_group-response"] + stdev = row["endpoint_group-stdev"] + incidence = row["endpoint_group-incidence"] + if lower_ci is not None or upper_ci is not None or n is None or n <= 0: + pass + elif ( + data_type == constants.DataType.CONTINUOUS + and response is not None + and stdev is not None + ): + ( + row["endpoint_group-lower_ci"], + row["endpoint_group-upper_ci"], + ) = cont_ci(stdev, n, response) + elif ( + data_type in [constants.DataType.DICHOTOMOUS, constants.DataType.DICHOTOMOUS_CANCER] + and incidence is not None + ): + ( + row["endpoint_group-lower_ci"], + row["endpoint_group-upper_ci"], + ) = dich_ci(incidence, n) + return row + + return df.apply(_func, axis="columns") + + def handle_stdev(self, df: pd.DataFrame) -> pd.DataFrame: + df["endpoint_group-stdev"] = df.apply( + lambda x: models.EndpointGroup.stdev( + x["endpoint-variance_type"], + x["endpoint_group-variance"], + x["endpoint_group-n"], + ), + axis="columns", + ) + return df + + def handle_percent_control(self, df: pd.DataFrame) -> pd.DataFrame: + def _func(group_df: pd.DataFrame) -> pd.DataFrame: + control = group_df.iloc[0] + + data_type = control["endpoint-data_type"] + i_1 = control["endpoint_group-incidence"] + n_1 = control["endpoint_group-n"] + mu_1 = control["endpoint_group-response"] + sd_1 = control["endpoint_group-stdev"] + + def __func(row: pd.Series) -> pd.Series: + # logic used from EndpointGroup.percentControl() + row["percent control mean"] = None + row["percent control low"] = None + row["percent control high"] = None + if data_type == constants.DataType.CONTINUOUS: + n_2 = row["endpoint_group-n"] + mu_2 = row["endpoint_group-response"] + sd_2 = row["endpoint_group-stdev"] + ( + row["percent control mean"], + row["percent control low"], + row["percent control high"], + ) = percent_control(n_1, mu_1, sd_1, n_2, mu_2, sd_2) + elif data_type == constants.DataType.PERCENT_DIFFERENCE: + row["percent control mean"] = row["endpoint_group-response"] + row["percent control low"] = row["endpoint_group-lower_ci"] + row["percent control high"] = row["endpoint_group-upper_ci"] + elif data_type == constants.DataType.DICHOTOMOUS: + if i_1 and n_1: + i_2 = row["endpoint_group-incidence"] + n_2 = row["endpoint_group-n"] + if n_2: + row["percent control mean"] = ( + ((i_2 / n_2) - (i_1 / n_1)) / (i_1 / n_1) * 100 + ) + return row + + group_df = group_df.apply(__func, axis="columns") + group_df["maximum endpoint change"] = maximum_percent_control_change( + group_df["percent control mean"].dropna() + ) + return group_df + + return ( + df.groupby("endpoint-id", group_keys=False, sort=False) + .apply(_func) + .reset_index(drop=True) + ) + + def handle_animal_description(self, df: pd.DataFrame): + def _func(group_df: pd.DataFrame) -> pd.Series: + gen = group_df["animal_group-generation"].iloc[0] + if len(gen) > 0: + gen += " " + ns_txt = "" + ns = group_df["endpoint_group-n"].dropna().astype(int).tolist() + if len(ns) > 0: + ns_txt = ", N=" + models.EndpointGroup.getNRangeText(ns) + + sex_symbol = group_df["animal_group-sex_symbol"].iloc[0] + if sex_symbol == "NR": + sex_symbol = "sex=NR" + species = group_df["animal_group-species_name"].iloc[0] + strain = group_df["animal_group-strain_name"].iloc[0] + group_df["animal description"] = f"{gen}{species}, {strain} ({sex_symbol})" + group_df["animal description (with N)"] = ( + f"{gen}{species}, {strain} ({sex_symbol}{ns_txt})" ) - noel_names = self.kwargs["assessment"].get_noel_names() - headers = [ - "study id", - "study name", - "study identifier", - "study published", - "experiment id", - "experiment name", - "chemical", - "animal group id", - "animal group name", - "lifestage exposed", - "lifestage assessed", - "species", - "species strain", - "generation", - "animal description", - "animal description (with N)", - "sex", - "route", - "treatment period", - "duration exposure", - "duration exposure (days)", - "endpoint id", - "endpoint name", - "system", - "organ", - "effect", - "effect subtype", - "diagnostic", - "tags", - "observation time", - "observation time text", - "data type", - "doses", - "dose units", - "response units", - "expected adversity direction", - "maximum endpoint change", - "low_dose", - "high_dose", - noel_names.noel, - noel_names.loel, - "FEL", - "trend test value", - "trend test result", - "key", - "dose index", - "dose", - "N", - "incidence", - "response", - "stdev", - "lower_ci", - "upper_ci", - "pairwise significant", - "pairwise significant value", - "treatment related effect", - "percent control mean", - "percent control low", - "percent control high", - "dichotomous summary", - "percent affected", - "percent lower ci", - "percent upper ci", - ] - headers.extend(list(self.rob_headers.values())) - - return headers + return group_df - def _get_data_rows(self): - preferred_units = self.kwargs.get("preferred_units", None) + return ( + df.groupby("endpoint-id", group_keys=False, sort=False) + .apply(_func) + .reset_index(drop=True) + ) - rows = [] - for obj in self.queryset: - ser = obj.get_json(json_encode=False) - doses = self._get_doses_list(ser, preferred_units) - endpoint_robs = [ - self.rob_data[(ser["id"], metric_id)] for metric_id in self.rob_headers.keys() - ] + def handle_treatment_period(self, df: pd.DataFrame) -> pd.DataFrame: + def _calc(row): + txt = row["experiment-type_display"].lower() + if txt.find("(") >= 0: + txt = txt[: txt.find("(")].strip() - # build endpoint-group independent data - row = [ - ser["animal_group"]["experiment"]["study"]["id"], - ser["animal_group"]["experiment"]["study"]["short_citation"], - ser["animal_group"]["experiment"]["study"]["study_identifier"], - ser["animal_group"]["experiment"]["study"]["published"], - ser["animal_group"]["experiment"]["id"], - ser["animal_group"]["experiment"]["name"], - ser["animal_group"]["experiment"]["chemical"], - ser["animal_group"]["id"], - ser["animal_group"]["name"], - ser["animal_group"]["lifestage_exposed"], - ser["animal_group"]["lifestage_assessed"], - ser["animal_group"]["species"], - self._get_species_strain(ser), - ser["animal_group"]["generation"], - get_gen_species_strain_sex(ser, withN=False), - get_gen_species_strain_sex(ser, withN=True), - ser["animal_group"]["sex"], - ser["animal_group"]["dosing_regime"]["route_of_exposure"].lower(), - get_treatment_period( - ser["animal_group"]["experiment"], - ser["animal_group"]["dosing_regime"], - ), - ser["animal_group"]["dosing_regime"]["duration_exposure_text"], - ser["animal_group"]["dosing_regime"]["duration_exposure"], - ser["id"], - ser["name"], - ser["system"], - ser["organ"], - ser["effect"], - ser["effect_subtype"], - ser["diagnostic"], - self.get_flattened_tags(ser, "effects"), - self._get_observation_time_and_time_units(ser), - ser["observation_time_text"], - ser["data_type_label"], - self._get_doses_str(doses), - self._get_dose_units(doses), - ser["response_units"], - ser["expected_adversity_direction"], - ser["percentControlMaxChange"], - ] + if row["dosing_regime-duration_exposure_text"]: + txt = f"{txt} ({row['dosing_regime-duration_exposure_text']})" - # dose-group specific information - if len(ser["groups"]) > 1: - row.extend( - [ - self._get_dose(doses, 1), # first non-zero dose - self._get_dose(doses, len(ser["groups"]) - 1), - self._get_dose(doses, ser["NOEL"]), - self._get_dose(doses, ser["LOEL"]), - self._get_dose(doses, ser["FEL"]), - ] - ) - else: - row.extend([None] * 5) - - row.extend([ser["trend_value"], ser["trend_result"]]) - - # endpoint-group information - for i, eg in enumerate(ser["groups"]): - row_copy = copy(row) - row_copy.extend( - [ - eg["id"], - eg["dose_group_id"], - self._get_dose(doses, i), - eg["n"], - eg["incidence"], - eg["response"], - eg["stdev"], - eg["lower_ci"], - eg["upper_ci"], - eg["significant"], - eg["significance_level"], - eg["treatment_effect"], - eg["percentControlMean"], - eg["percentControlLow"], - eg["percentControlHigh"], - eg["dichotomous_summary"], - eg["percent_affected"], - eg["percent_lower_ci"], - eg["percent_upper_ci"], - ] - ) - row_copy.extend(endpoint_robs) - rows.append(row_copy) + return txt - return rows + df["treatment period"] = df.apply(_calc, axis=1, result_type="expand") + return df + def handle_dose_groups(self, df: pd.DataFrame) -> pd.DataFrame: + noel_names = self.kwargs["assessment"].get_noel_names() -class EndpointFlatDataPivot(EndpointGroupFlatDataPivot): - def _get_header_row(self): - if self.queryset.first() is None: - self.rob_headers, self.rob_data = {}, {} - else: - endpoint_ids = set(self.queryset.values_list("id", flat=True)) - self.rob_headers, self.rob_data = FinalRiskOfBiasScore.get_dp_export( - self.queryset.first().assessment_id, - endpoint_ids, - "animal", + def _func(group_df: pd.DataFrame) -> pd.Series: + preferred_units = self.get_preferred_units(group_df) + group_df = group_df[(group_df["dose_group-dose_units_id"] == preferred_units)] + reported_doses = group_df["dose_group-dose"].mask( + pd.isna(group_df["endpoint_group-n"]) + & pd.isna(group_df["endpoint_group-response"]) + & pd.isna(group_df["endpoint_group-incidence"]) + ) + doses = ( + group_df["dose_group-dose"] + if reported_doses.dropna().empty + else reported_doses.dropna() + ) + group_df["doses"] = ( + ", ".join(doses.astype(str)) + " " + group_df["dose_group-dose_units_name"] ) - noel_names = self.kwargs["assessment"].get_noel_names() - header = [ - "study id", - "study name", - "study identifier", - "study published", - "experiment id", - "experiment name", - "chemical", - "animal group id", - "animal group name", - "lifestage exposed", - "lifestage assessed", - "species", - "species strain", - "generation", - "animal description", - "animal description (with N)", - "sex", - "route", - "treatment period", - "duration exposure", - "duration exposure (days)", - "endpoint id", - "endpoint name", - "system", - "organ", - "effect", - "effect subtype", - "diagnostic", - "tags", - "observation time", - "observation time text", - "data type", - "doses", - "dose units", - "response units", - "expected adversity direction", - "low_dose", - "high_dose", - noel_names.noel, - noel_names.loel, - "FEL", - "BMD", - "BMDL", - "trend test value", - "trend test result", + if reported_doses.dropna().empty: + group_df["low_dose"] = None + group_df["high_dose"] = None + group_df[noel_names.noel] = None + group_df[noel_names.loel] = None + group_df["FEL"] = None + return group_df + low_dose_index = reported_doses.iloc[1:].first_valid_index() + group_df["low_dose"] = ( + None if low_dose_index is None else reported_doses.loc[low_dose_index] + ) + high_dose_index = reported_doses.iloc[1:].last_valid_index() + group_df["high_dose"] = ( + None if high_dose_index is None else reported_doses.loc[high_dose_index] + ) + NOEL_series = group_df["dose_group-dose"][ + group_df["endpoint_group-NOEL"].fillna(False) & pd.notna(reported_doses) + ] + group_df[noel_names.noel] = NOEL_series.iloc[0] if NOEL_series.size > 0 else None + LOEL_series = group_df["dose_group-dose"][ + group_df["endpoint_group-LOEL"].fillna(False) & pd.notna(reported_doses) + ] + group_df[noel_names.loel] = LOEL_series.iloc[0] if LOEL_series.size > 0 else None + FEL_series = group_df["dose_group-dose"][ + group_df["endpoint_group-FEL"].fillna(False) & pd.notna(reported_doses) + ] + group_df["FEL"] = FEL_series.iloc[0] if FEL_series.size > 0 else None + return group_df + + return ( + df.groupby("endpoint-id", group_keys=False, sort=False) + .apply(_func) + .reset_index(drop=True) + ) + + def handle_incidence_summary(self, df: pd.DataFrame) -> pd.DataFrame: + def _func(group_df: pd.DataFrame) -> pd.Series: + group_df["dichotomous summary"] = "-" + group_df["percent affected"] = None + group_df["percent lower ci"] = None + group_df["percent upper ci"] = None + data_type = group_df["endpoint-data_type"].iloc[0] + + def __func(row: pd.Series) -> pd.Series: + # logic used from EndpointGroup.get_incidence_summary() + n = row["endpoint_group-n"] + i = row["endpoint_group-incidence"] + if ( + data_type + in [constants.DataType.DICHOTOMOUS, constants.DataType.DICHOTOMOUS_CANCER] + and n is not None + and n > 0 + and i is not None + ): + row["dichotomous summary"] = f"{int(i)}/{int(n)} ({i / n * 100:.1f}%)" + row["percent affected"] = i / n * 100 + row["percent lower ci"] = row["endpoint_group-lower_ci"] * 100 + row["percent upper ci"] = row["endpoint_group-upper_ci"] * 100 + return row + + return group_df.apply(__func, axis="columns") + + return ( + df.groupby("endpoint-id", group_keys=False, sort=False) + .apply(_func) + .reset_index(drop=True) + ) + + def build_df(self) -> pd.DataFrame: + df = EndpointGroupFlatDataPivotExporter().get_df( + self.queryset.select_related( + "animal_group__experiment__study", + "animal_group__dosing_regime", + ) + .prefetch_related("groups", "animal_group__dosing_regime__doses") + .order_by("id", "groups", "animal_group__dosing_regime__doses") + ) + df = df[ + pd.isna(df["dose_group-id"]) + | (df["endpoint_group-dose_group_id"] == df["dose_group-dose_group_id"]) ] + if df.empty: + return df + if obj := self.queryset.first(): + endpoint_ids = list(df["endpoint-id"].unique()) + rob_df = get_final_score_df(obj.assessment_id, endpoint_ids, "animal") + df = df.join(rob_df, on="endpoint-id") + + df["route"] = df["dosing_regime-route_of_exposure_display"].str.lower() + df["species strain"] = ( + df["animal_group-species_name"] + " " + df["animal_group-strain_name"] + ) + + df["observation time"] = ( + df["endpoint-observation_time"].replace(np.nan, None).astype(str) + + " " + + df["endpoint-observation_time_units_display"] + ) + + df = self.handle_stdev(df) + df = self.handle_ci(df) + df = self.handle_dose_groups(df) + df = self.handle_animal_description(df) + df = self.handle_treatment_period(df) + df = self.handle_percent_control(df) + df = self.handle_incidence_summary(df) + + df = df.rename( + columns={ + "study-id": "study id", + "study-short_citation": "study name", + "study-study_identifier": "study identifier", + "study-published": "study published", + "experiment-id": "experiment id", + "experiment-name": "experiment name", + "experiment-chemical": "chemical", + "animal_group-id": "animal group id", + "animal_group-name": "animal group name", + "animal_group-lifestage_exposed": "lifestage exposed", + "animal_group-lifestage_assessed": "lifestage assessed", + "animal_group-species_name": "species", + "animal_group-generation": "generation", + "animal_group-sex_display": "sex", + "dosing_regime-duration_exposure_text": "duration exposure", + "dosing_regime-duration_exposure": "duration exposure (days)", + "endpoint-id": "endpoint id", + "endpoint-name": "endpoint name", + "endpoint-system": "system", + "endpoint-organ": "organ", + "endpoint-effect": "effect", + "endpoint-effect_subtype": "effect subtype", + "endpoint-diagnostic": "diagnostic", + "endpoint-effects_display": "tags", + "endpoint-observation_time_text": "observation time text", + "endpoint-data_type_display": "data type", + "dose_group-dose_units_name": "dose units", + "endpoint-response_units": "response units", + "endpoint-expected_adversity_direction": "expected adversity direction", + "endpoint-trend_value": "trend test value", + "endpoint-trend_result_display": "trend test result", + "endpoint_group-id": "key", + "endpoint_group-dose_group_id": "dose index", + "dose_group-dose": "dose", + "endpoint_group-n": "N", + "endpoint_group-incidence": "incidence", + "endpoint_group-response": "response", + "endpoint_group-stdev": "stdev", + "endpoint_group-lower_ci": "lower_ci", + "endpoint_group-upper_ci": "upper_ci", + "endpoint_group-significant": "pairwise significant", + "endpoint_group-significance_level": "pairwise significant value", + "endpoint_group-treatment_effect_display": "treatment related effect", + } + ) + df = df.drop( + columns=[ + "endpoint-observation_time", + "dose_group-id", + "dose_group-dose_group_id", + "experiment-type_display", + "endpoint_group-FEL", + "animal_group-sex_symbol", + "animal_group-strain_name", + "endpoint_group-LOEL", + "endpoint-variance_type", + "dose_group-dose_units_id", + "endpoint_group-NOEL", + "endpoint-observation_time_units_display", + "endpoint_group-variance", + "endpoint-data_type", + "dosing_regime-route_of_exposure_display", + ] + ) - num_doses = self.queryset.model.max_dose_count(self.queryset) - rng = range(1, num_doses + 1) - header.extend([f"Dose {i}" for i in rng]) - header.extend([f"Significant {i}" for i in rng]) - header.extend([f"Treatment Related Effect {i}" for i in rng]) - header.extend(list(self.rob_headers.values())) - - # distinct applied last so that queryset can add annotations above - # in self.queryset.model.max_dose_count - self.queryset = self.queryset.distinct("pk") - self.num_doses = num_doses - - return header - - @staticmethod - def _get_bmd_values(bmds, preferred_units): - # only return BMD values if they're in the preferred units - for bmd in bmds: - # return first match - if bmd["dose_units_id"] in preferred_units and bmd["model"] is not None: - return [bmd["bmd"], bmd["bmdl"]] - return [None, None] - - @staticmethod - def _dose_is_reported(dose_group_id: int, groups: list[dict]) -> bool: - """ - Check if any numerical data( n, response, or incidence) was entered for a dose-group - """ - for group in groups: - if group["dose_group_id"] == dose_group_id: - return any(group.get(key) is not None for key in ["n", "response", "incidence"]) - return False - - @staticmethod - def _dose_low_high(dose_list: list[float | None]) -> tuple[float | None, float | None]: - """ - Finds the lowest and highest non-zero dose from a given list of doses, - ignoring None values. If there are no valid doses, returns None for both - lowest and highest dose. - - Args: - dose_list (list[Optional[float]]): List of doses - - Returns: - tuple[Optional[float], Optional[float]]: Lowest dose and highest dose, - in that order. - """ - try: - # map dose list to whether there is recorded data (valid) - dose_validity_list = list(map(lambda d: d is not None, dose_list)) - # first valid dose - low_index = dose_validity_list[1:].index(True) + 1 - # last valid dose - high_index = len(dose_list) - 1 - dose_validity_list[1:][::-1].index(True) - return (dose_list[low_index], dose_list[high_index]) - except ValueError: - return (None, None) - - def _get_data_rows(self): - preferred_units = self.kwargs.get("preferred_units", None) + return df - rows = [] - for obj in self.queryset: - ser = obj.get_json(json_encode=False) - doses = self._get_doses_list(ser, preferred_units) - # filter dose groups by those with recorded data - filtered_doses = list( - filter(lambda d: self._dose_is_reported(d["dose_group_id"], ser["groups"]), doses) - ) - # special case - if no data was reported for any dose-group show all doses; - # it may be the case that data wasn't extracted - if len(filtered_doses) == 0: - filtered_doses = doses - - # build endpoint-group independent data - row = [ - ser["animal_group"]["experiment"]["study"]["id"], - ser["animal_group"]["experiment"]["study"]["short_citation"], - ser["animal_group"]["experiment"]["study"]["study_identifier"], - ser["animal_group"]["experiment"]["study"]["published"], - ser["animal_group"]["experiment"]["id"], - ser["animal_group"]["experiment"]["name"], - ser["animal_group"]["experiment"]["chemical"], - ser["animal_group"]["id"], - ser["animal_group"]["name"], - ser["animal_group"]["lifestage_exposed"], - ser["animal_group"]["lifestage_assessed"], - ser["animal_group"]["species"], - self._get_species_strain(ser), - ser["animal_group"]["generation"], - get_gen_species_strain_sex(ser, withN=False), - get_gen_species_strain_sex(ser, withN=True), - ser["animal_group"]["sex"], - ser["animal_group"]["dosing_regime"]["route_of_exposure"].lower(), - get_treatment_period( - ser["animal_group"]["experiment"], - ser["animal_group"]["dosing_regime"], +class EndpointFlatDataPivotExporter(Exporter): + def build_modules(self) -> list[ModelExport]: + return [ + StudyExport( + "study", + "animal_group__experiment__study", + include=("id", "short_citation", "study_identifier", "published"), + ), + ExperimentExport( + "experiment", + "animal_group__experiment", + include=("id", "name", "type_display", "chemical"), + ), + AnimalGroupExport( + "animal_group", + "animal_group", + include=( + "id", + "name", + "lifestage_exposed", + "lifestage_assessed", + "species_name", + "strain_name", + "generation", + "sex_display", + "sex_symbol", ), - ser["animal_group"]["dosing_regime"]["duration_exposure_text"], - ser["animal_group"]["dosing_regime"]["duration_exposure"], - ser["id"], - ser["name"], - ser["system"], - ser["organ"], - ser["effect"], - ser["effect_subtype"], - ser["diagnostic"], - self.get_flattened_tags(ser, "effects"), - self._get_observation_time_and_time_units(ser), - ser["observation_time_text"], - ser["data_type_label"], - self._get_doses_str(filtered_doses), - self._get_dose_units(doses), - ser["response_units"], - ser["expected_adversity_direction"], - ] - - # if groups exist, pull all available. Otherwise, start with an empty list. This - # is preferred than just pulling in edge cases where an endpoint has no data - # extracted but has more dose-groups at the animal group level than are avaiable - # for the entire data export. For example, an endpoint may have no data extracted - # and dose-groups, but the entire export may only have data with 4 dose-groups. - dose_list = ( - [ - self._get_dose(doses, i) if self._dose_is_reported(i, ser["groups"]) else None - for i in range(len(doses)) - ] - if ser["groups"] - else [] - ) + ), + DosingRegimeExport( + "dosing_regime", + "animal_group__dosing_regime", + include=( + "route_of_exposure_display", + "duration_exposure_text", + "duration_exposure", + ), + ), + EndpointExport( + "endpoint", + "", + include=( + "id", + "name", + "system", + "organ", + "effect", + "effect_subtype", + "diagnostic", + "effects_display", + "observation_time", + "observation_time_units_display", + "observation_time_text", + "variance_type", + "data_type", + "data_type_display", + "trend_value", + "trend_result_display", + "expected_adversity_direction", + "response_units", + ), + ), + EndpointGroupExport( + "endpoint_group", + "groups", + include=( + "id", + "dose_group_id", + "n", + "incidence", + "response", + "lower_ci", + "upper_ci", + "significant", + "significance_level", + "treatment_effect_display", + "NOEL", + "LOEL", + "FEL", + "variance", + ), + ), + DoseGroupExport( + "dose_group", + "animal_group__dosing_regime__doses", + include=("id", "dose_units_id", "dose_units_name", "dose_group_id", "dose"), + ), + ] - # dose-group specific information - row.extend(self._dose_low_high(dose_list)) - try: - row.append(dose_list[ser["NOEL"]]) - except IndexError: - row.append(None) - try: - row.append(dose_list[ser["LOEL"]]) - except IndexError: - row.append(None) - try: - row.append(dose_list[ser["FEL"]]) - except IndexError: - row.append(None) - dose_list.extend([None] * (self.num_doses - len(dose_list))) +class EndpointFlatDataPivot(EndpointGroupFlatDataPivot): + def handle_bmd(self, df: pd.DataFrame) -> pd.DataFrame: + endpoint_ids = df["endpoint-id"].unique() + sessions = Session.objects.filter(endpoint_id__in=endpoint_ids, active=True) + bmd_map = defaultdict(list) + for session in sessions: + bmd_map[session.endpoint_id].append(session.get_selected_model()) + preferred_units = self.kwargs.get("preferred_units", None) + df["BMD"] = None + df["BMDL"] = None + + def _func(row: pd.Series) -> pd.Series: + bmds = bmd_map[row["endpoint-id"]] + for bmd in bmds: + if bmd["dose_units_id"] in preferred_units and bmd["model"] is not None: + row["BMD"] = bmd["bmd"] + row["BMDL"] = bmd["bmdl"] + break + return row - # bmd/bmdl information - row.extend(self._get_bmd_values(ser["bmds"], preferred_units)) + return df.apply(_func, axis="columns") - row.extend([ser["trend_value"], ser["trend_result"]]) + def handle_flat_doses(self, df: pd.DataFrame) -> pd.DataFrame: + def _func(group_df: pd.DataFrame) -> pd.Series: + unique_df = group_df.drop_duplicates(subset="endpoint_group-id").reset_index(drop=True) + reported_doses = unique_df["dose_group-dose"].mask( + pd.isna(unique_df["endpoint_group-n"]) + & pd.isna(unique_df["endpoint_group-response"]) + & pd.isna(unique_df["endpoint_group-incidence"]) + ) + num_doses = reported_doses.size - row.extend(dose_list) + group_df[[f"Dose {i}" for i in range(1, num_doses + 1)]] = reported_doses.reset_index( + drop=True + ) - sigs = get_significance_and_direction(ser["data_type"], ser["groups"]) - sigs.extend([None] * (self.num_doses - len(sigs))) - row.extend(sigs) + data_type = unique_df["endpoint-data_type"].iloc[0] + control_group = unique_df.iloc[0] + if pd.isna(unique_df["endpoint_group-id"]).all(): + pass + elif data_type in { + constants.DataType.CONTINUOUS, + constants.DataType.PERCENT_DIFFERENCE, + constants.DataType.DICHOTOMOUS, + constants.DataType.DICHOTOMOUS_CANCER, + }: + if data_type in { + constants.DataType.CONTINUOUS, + constants.DataType.PERCENT_DIFFERENCE, + }: + field = "endpoint_group-response" + elif data_type in { + constants.DataType.DICHOTOMOUS, + constants.DataType.DICHOTOMOUS_CANCER, + }: + field = "percent affected" + control_resp = control_group[field] + insignificant = pd.Series(["No"] * num_doses) + significant = pd.Series(["Yes - ?"] * num_doses) + significant_up = pd.Series(["Yes - ↑"] * num_doses) + significant_down = pd.Series(["Yes - ↓"] * num_doses) + + significance = insignificant.mask( + (unique_df["endpoint_group-significant"].fillna(False)), + significant_down.mask((unique_df[field] > control_resp), significant_up).mask( + ( + (pd.isna(control_resp)) + | (pd.isna(unique_df[field])) + | (unique_df[field] == control_resp) + ), + significant, + ), + ) + group_df[[f"Significant {i}" for i in range(1, num_doses + 1)]] = significance + elif data_type == constants.DataType.NR: + group_df[[f"Significant {i}" for i in range(1, num_doses + 1)]] = pd.Series( + ["?"] * num_doses + ) - tres = [dose["treatment_effect"] for dose in ser["groups"]] - tres.extend([None] * (self.num_doses - len(tres))) - row.extend(tres) + group_df[[f"Treatment Related Effect {i}" for i in range(1, num_doses + 1)]] = ( + unique_df["endpoint_group-treatment_effect_display"].reset_index(drop=True) + ) - row.extend( - [self.rob_data[(ser["id"], metric_id)] for metric_id in self.rob_headers.keys()] + return group_df.drop_duplicates( + subset=group_df.columns[group_df.columns.str.endswith("-id")].difference( + ["endpoint_group-id"] + ) ) - rows.append(row) + return ( + df.groupby("endpoint-id", group_keys=False, sort=False) + .apply(_func) + .reset_index(drop=True) + ) + + def build_df(self) -> pd.DataFrame: + df = EndpointFlatDataPivotExporter().get_df( + self.queryset.select_related( + "animal_group__experiment__study", + "animal_group__dosing_regime", + ) + .prefetch_related("groups", "animal_group__dosing_regime__doses") + .order_by("id", "groups", "animal_group__dosing_regime__doses") + ) + df = df[ + pd.isna(df["endpoint_group-id"]) + | pd.isna(df["dose_group-id"]) + | (df["endpoint_group-dose_group_id"] == df["dose_group-dose_group_id"]) + ] + if df.empty: + return df + if obj := self.queryset.first(): + endpoint_ids = list(df["endpoint-id"].unique()) + rob_df = get_final_score_df(obj.assessment_id, endpoint_ids, "animal") + df = df.join(rob_df, on="endpoint-id") + + df["route"] = df["dosing_regime-route_of_exposure_display"].str.lower() + df["species strain"] = ( + df["animal_group-species_name"] + " " + df["animal_group-strain_name"] + ) + + df["observation time"] = ( + df["endpoint-observation_time"].replace(np.nan, None).astype(str) + + " " + + df["endpoint-observation_time_units_display"] + ) + + df = self.handle_stdev(df) + df = self.handle_ci(df) + df = self.handle_incidence_summary(df) + df = self.handle_dose_groups(df) + df = self.handle_flat_doses(df) + df = self.handle_animal_description(df) + df = self.handle_treatment_period(df) + df = self.handle_bmd(df) + + df = df.drop_duplicates(subset="endpoint-id") + + df = df.rename( + columns={ + "study-id": "study id", + "study-short_citation": "study name", + "study-study_identifier": "study identifier", + "study-published": "study published", + "experiment-id": "experiment id", + "experiment-name": "experiment name", + "experiment-chemical": "chemical", + "animal_group-id": "animal group id", + "animal_group-name": "animal group name", + "animal_group-lifestage_exposed": "lifestage exposed", + "animal_group-lifestage_assessed": "lifestage assessed", + "animal_group-species_name": "species", + "animal_group-generation": "generation", + "animal_group-sex_display": "sex", + "dosing_regime-duration_exposure_text": "duration exposure", + "dosing_regime-duration_exposure": "duration exposure (days)", + "endpoint-id": "endpoint id", + "endpoint-name": "endpoint name", + "endpoint-system": "system", + "endpoint-organ": "organ", + "endpoint-effect": "effect", + "endpoint-effect_subtype": "effect subtype", + "endpoint-diagnostic": "diagnostic", + "endpoint-effects_display": "tags", + "endpoint-observation_time_text": "observation time text", + "endpoint-data_type_display": "data type", + "dose_group-dose_units_name": "dose units", + "endpoint-response_units": "response units", + "endpoint-expected_adversity_direction": "expected adversity direction", + "endpoint-trend_value": "trend test value", + "endpoint-trend_result_display": "trend test result", + } + ) + df = df.drop( + columns=[ + "endpoint_group-stdev", + "percent lower ci", + "percent affected", + "percent upper ci", + "dichotomous summary", + "endpoint-variance_type", + "dose_group-dose", + "endpoint-data_type", + "endpoint_group-NOEL", + "endpoint_group-incidence", + "endpoint_group-FEL", + "endpoint_group-treatment_effect_display", + "dose_group-dose_units_id", + "endpoint_group-LOEL", + "dose_group-dose_group_id", + "endpoint_group-variance", + "animal_group-sex_symbol", + "endpoint_group-upper_ci", + "endpoint_group-significance_level", + "endpoint_group-response", + "endpoint_group-lower_ci", + "endpoint_group-significant", + "dose_group-id", + "experiment-type_display", + "endpoint_group-dose_group_id", + "endpoint-observation_time_units_display", + "endpoint_group-n", + "dosing_regime-route_of_exposure_display", + "endpoint_group-id", + "animal_group-strain_name", + "endpoint-observation_time", + ] + ) - return rows + return df -class EndpointSummary(FlatFileExporter): - def _get_header_row(self): +class EndpointSummaryExporter(Exporter): + def build_modules(self) -> list[ModelExport]: return [ - "study-short_citation", - "study-study_identifier", - "experiment-chemical", - "animal_group-name", - "animal_group-sex", - "animal description (with n)", - "dosing_regime-route_of_exposure", - "dosing_regime-duration_exposure_text", - "species-name", - "strain-name", - "endpoint-id", - "endpoint-url", - "endpoint-system", - "endpoint-organ", - "endpoint-effect", - "endpoint-name", - "endpoint-observation_time", - "endpoint-response_units", - "Dose units", - "Doses", - "N", - "Responses", - "Doses and responses", - "Response direction", + StudyExport( + "study", + "animal_group__experiment__study", + include=("short_citation", "study_identifier"), + ), + ExperimentExport( + "experiment", "animal_group__experiment", include=("chemical", "type_display") + ), + AnimalGroupExport( + "animal_group", + "animal_group", + include=( + "name", + "species_name", + "strain_name", + "generation", + "sex_display", + "sex_symbol", + ), + ), + DosingRegimeExport( + "dosing_regime", + "animal_group__dosing_regime", + include=("route_of_exposure_display", "duration_exposure_text"), + ), + EndpointExport( + "endpoint", + "", + include=( + "id", + "url", + "name", + "system", + "organ", + "effect", + "observation_time_text", + "response_units", + "data_type", + ), + ), + EndpointGroupExport( + "endpoint_group", + "groups", + include=( + "id", + "dose_group_id", + "n", + "incidence", + "response", + "variance", + "significant", + ), + ), + DoseGroupExport( + "dose_group", + "animal_group__dosing_regime__doses", + include=("dose_units_name", "dose_group_id", "dose"), + ), ] - def _get_data_rows(self): - def getDoseUnits(doses): - return set(sorted([d["dose_units"]["name"] for d in doses])) - - def getDoses(doses, unit): - doses = [d["dose"] for d in doses if d["dose_units"]["name"] == unit] - return [f"{d:g}" for d in doses] - - def getNs(groups): - return [f"{grp['n'] if grp['n'] is not None else ''}" for grp in groups] - - def getResponses(groups): - resps = [] - for grp in groups: - txt = "" - if grp["isReported"]: - if grp["response"] is not None: - txt = f"{grp['response']:g}" - else: - txt = f"{grp['incidence']:g}" - if grp["variance"] is not None: - txt = f"{txt} ± {grp['variance']:g}" - resps.append(txt) - return resps - - def getDR(doses, responses, units): - txts = [] - for i in range(len(doses)): - if len(responses) > i and len(responses[i]) > 0: - txt = f"{doses[i]} {units}: {responses[i]}" - txts.append(txt) - return ", ".join(txts) - - def getResponseDirection(responses, data_type): - # return unknown if control response is null - if responses and responses[0]["response"] is None: - return "?" - - txt = "↔" - for resp in responses: - if resp["significant"]: - if data_type in ["C", "P"]: - if resp["response"] > responses[0]["response"]: - txt = "↑" - else: - txt = "↓" - else: - txt = "↑" - break - return txt - rows = [] - for obj in self.queryset: - ser = obj.get_json(json_encode=False) - - doses = ser["animal_group"]["dosing_regime"]["doses"] - units = getDoseUnits(doses) - - # build endpoint-group independent data - row = [ - ser["animal_group"]["experiment"]["study"]["short_citation"], - ser["animal_group"]["experiment"]["study"]["study_identifier"], - ser["animal_group"]["experiment"]["chemical"], - ser["animal_group"]["name"], - ser["animal_group"]["sex"], - get_gen_species_strain_sex(ser, withN=True), - ser["animal_group"]["dosing_regime"]["route_of_exposure"], - get_treatment_period( - ser["animal_group"]["experiment"], - ser["animal_group"]["dosing_regime"], - ), - ser["animal_group"]["species"], - ser["animal_group"]["strain"], - ser["id"], - ser["url"], - ser["system"], - ser["organ"], - ser["effect"], - ser["name"], - ser["observation_time_text"], - ser["response_units"], - ] +class EndpointSummary(EndpointGroupFlatDataPivot): + def _set_responses(self, df: pd.DataFrame): + df["responses"] = None - responses_list = getResponses(ser["groups"]) - ns_list = getNs(ser["groups"]) - response_direction = getResponseDirection(ser["groups"], ser["data_type"]) - for unit in units: - row_copy = copy(row) - doses_list = getDoses(doses, unit) - row_copy.extend( - [ - unit, # 'units' - ", ".join(doses_list), # Doses - ", ".join(ns_list), # Ns - ", ".join(responses_list), # Responses w/ units - getDR(doses_list, responses_list, unit), - response_direction, - ] - ) - rows.append(row_copy) + def _func(group_df: pd.DataFrame) -> pd.Series: + unique_df = group_df.drop_duplicates(subset="endpoint_group-id") + response_series = ( + unique_df["endpoint_group-response"] + .map("{:g}".format, na_action="ignore") + .fillna("") + ) + incidence_series = ( + unique_df["endpoint_group-incidence"] + .map("{:g}".format, na_action="ignore") + .fillna("") + ) + variance_series = ( + unique_df["endpoint_group-variance"] + .map("{:g}".format, na_action="ignore") + .fillna("") + ) + response_or_incidence = incidence_series.mask( + response_series.str.len() > 0, response_series + ) + response_or_incidence_with_variance = response_or_incidence.mask( + (response_or_incidence.str.len() > 0) & (variance_series.str.len() > 0), + response_or_incidence + " ± " + variance_series, + ) + group_df["responses"] = [ + response_or_incidence_with_variance.reset_index(drop=True) + ] * group_df.shape[0] + + return group_df + + return ( + df.groupby("endpoint-id", group_keys=False, sort=False) + .apply(_func) + .reset_index(drop=True) + ) + + def _set_ns(self, df: pd.DataFrame): + df["ns"] = None + + def _func(group_df: pd.DataFrame) -> pd.Series: + unique_df = group_df.drop_duplicates(subset="endpoint_group-id") + group_df["ns"] = [ + unique_df["endpoint_group-n"] + .map("{:g}".format, na_action="ignore") + .fillna("") + .reset_index(drop=True) + ] * group_df.shape[0] + + return group_df + + return ( + df.groupby("endpoint-id", group_keys=False, sort=False) + .apply(_func) + .reset_index(drop=True) + ) + + def _set_response_direction(self, df: pd.DataFrame): + df["response_direction"] = None + + def _func(group_df: pd.DataFrame) -> pd.Series: + data_type = group_df["endpoint-data_type"].iloc[0] + control_group = group_df.iloc[0] + if pd.notna(control_group["endpoint_group-id"]) and pd.isna( + control_group["endpoint_group-response"] + ): + group_df["response_direction"] = "?" + return group_df + significant_groups = group_df[group_df["endpoint_group-significant"].fillna(False)] + if significant_groups.empty: + group_df["response_direction"] = "↔" + return group_df + significant_group = significant_groups.iloc[0] + if data_type in [constants.DataType.CONTINUOUS, constants.DataType.PERCENT_DIFFERENCE]: + if ( + significant_group["endpoint_group-response"] + > control_group["endpoint_group-response"] + ): + group_df["response_direction"] = "↑" + else: + group_df["response_direction"] = "↓" + return group_df + else: + group_df["response_direction"] = "↑" + return group_df + + return ( + df.groupby("endpoint-id", group_keys=False, sort=False) + .apply(_func) + .reset_index(drop=True) + ) + + def _set_doses(self, df: pd.DataFrame): + df["doses"] = None + + def _func(group_df: pd.DataFrame) -> pd.Series: + def __func(_group_df: pd.DataFrame) -> pd.Series: + _group_df["doses"] = [ + _group_df["dose_group-dose"] + .map("{:g}".format, na_action="ignore") + .fillna("") + .reset_index(drop=True) + ] * _group_df.shape[0] + return _group_df + + return ( + group_df.groupby("dose_group-dose_units_name", group_keys=False, sort=False) + .apply(__func) + .reset_index(drop=True) + ) + + return ( + df.groupby("endpoint-id", group_keys=False, sort=False) + .apply(_func) + .reset_index(drop=True) + ) + + def handle_other(self, df: pd.DataFrame) -> pd.DataFrame: + def _func(series: pd.Series): + units = series["dose_group-dose_units_name"] + doses, responses = series["doses"], series["responses"] + doses, responses = ( + doses.iloc[: min(doses.size, responses.size)], + responses.iloc[: min(doses.size, responses.size)], + ) + valid = responses.str.len() > 0 + return ", ".join(doses[valid] + " " + units + ": " + responses[valid]) + + df = self._set_responses(df) + df = self._set_ns(df) + df = self._set_response_direction(df) + df = self._set_doses(df) + + df["Dose units"] = df["dose_group-dose_units_name"] + df["Doses"] = df["doses"].str.join(", ") + df["N"] = df["ns"].str.join(", ") + df["Responses"] = df["responses"].str.join(", ") + df["Doses and responses"] = df.apply(_func, axis="columns", result_type="reduce") + df["Response direction"] = df["response_direction"] + + return df + + def handle_treatment_period(self, df: pd.DataFrame): + txt = df["experiment-type_display"].str.lower() + txt_index = txt.str.find("(") + txt_updated = ( + txt.to_frame(name="txt") + .join(txt_index.to_frame(name="txt_index")) + .apply( + lambda x: x["txt"] if x["txt_index"] < 0 else x["txt"][: x["txt_index"]], + axis="columns", + result_type="reduce", + ) + ).astype(str) + df["dosing_regime-duration_exposure_text"] = ( + txt_updated + " (" + df["dosing_regime-duration_exposure_text"] + ).where(df["dosing_regime-duration_exposure_text"].str.len() > 0) + ")" + return df + + def build_df(self) -> pd.DataFrame: + df = EndpointSummaryExporter().get_df( + self.queryset.select_related( + "animal_group__experiment__study", + "animal_group__dosing_regime", + ) + .prefetch_related("groups", "animal_group__dosing_regime__doses") + .order_by("id", "groups", "animal_group__dosing_regime__doses") + ) + + df = df[ + (pd.isna(df["endpoint_group-id"])) + | (df["endpoint_group-dose_group_id"] == df["dose_group-dose_group_id"]) + ] + if df.empty: + return df + df = self.handle_animal_description(df) + df = self.handle_treatment_period(df) + df = self.handle_other(df) + + df = df.drop_duplicates(subset=["endpoint-id", "dose_group-dose_units_name"]) + df = df.sort_values(by=["endpoint-id", "dose_group-dose_units_name"]) + + df = df.rename( + columns={ + "animal description (with N)": "animal description (with n)", + "animal_group-sex_display": "animal_group-sex", + "dosing_regime-route_of_exposure_display": "dosing_regime-route_of_exposure", + "animal_group-species_name": "species-name", + "animal_group-strain_name": "strain-name", + "endpoint-observation_time_text": "endpoint-observation_time", + } + ) + + df = df.drop( + columns=[ + "doses", + "experiment-type_display", + "animal description", + "endpoint_group-incidence", + "animal_group-sex_symbol", + "dose_group-dose_units_name", + "dose_group-dose_group_id", + "endpoint-data_type", + "response_direction", + "responses", + "endpoint_group-significant", + "endpoint_group-n", + "dose_group-dose", + "endpoint_group-response", + "endpoint_group-id", + "animal_group-generation", + "endpoint_group-dose_group_id", + "ns", + "endpoint_group-variance", + ] + ) - return rows + return df diff --git a/hawc/apps/animal/models.py b/hawc/apps/animal/models.py index 2220ec0886..14a0164d58 100644 --- a/hawc/apps/animal/models.py +++ b/hawc/apps/animal/models.py @@ -17,7 +17,6 @@ from ..common.helper import ( HAWCDjangoJSONEncoder, SerializerHelper, - cleanHTML, df_move_column, tryParseInt, ) @@ -138,46 +137,6 @@ def is_generational(self): def get_assessment(self): return self.study.get_assessment() - @staticmethod - def flat_complete_header_row(): - return ( - "experiment-id", - "experiment-url", - "experiment-name", - "experiment-type", - "experiment-has_multiple_generations", - "experiment-chemical", - "experiment-cas", - "experiment-dtxsid", - "experiment-chemical_source", - "experiment-purity_available", - "experiment-purity_qualifier", - "experiment-purity", - "experiment-vehicle", - "experiment-guideline_compliance", - "experiment-description", - ) - - @staticmethod - def flat_complete_data_row(ser): - return ( - ser["id"], - ser["url"], - ser["name"], - ser["type"], - ser["has_multiple_generations"], - ser["chemical"], - ser["cas"], - ser["dtxsid"], - ser["chemical_source"], - ser["purity_available"], - ser["purity_qualifier"], - ser["purity"], - ser["vehicle"], - ser["guideline_compliance"], - cleanHTML(ser["description"]), - ) - @classmethod def delete_caches(cls, ids): Endpoint.delete_caches( @@ -313,48 +272,10 @@ def generation_short(self): def get_generation_short(cls, value) -> str: return "Other" if value == "Ot" else value - @staticmethod - def flat_complete_header_row(): - return ( - "animal_group-id", - "animal_group-url", - "animal_group-name", - "animal_group-sex", - "animal_group-animal_source", - "animal_group-lifestage_exposed", - "animal_group-lifestage_assessed", - "animal_group-siblings", - "animal_group-parents", - "animal_group-generation", - "animal_group-comments", - "animal_group-diet", - "species-name", - "strain-name", - ) - @classmethod def get_relation_id(cls, rel): return str(rel["id"]) if rel else None - @classmethod - def flat_complete_data_row(cls, ser): - return ( - ser["id"], - ser["url"], - ser["name"], - ser["sex"], - ser["animal_source"], - ser["lifestage_exposed"], - ser["lifestage_assessed"], - cls.get_relation_id(ser["siblings"]), - "|".join([cls.get_relation_id(p) for p in ser["parents"]]), - ser["generation"], - cleanHTML(ser["comments"]), - ser["diet"], - ser["species"], - ser["strain"], - ) - @classmethod def delete_caches(cls, ids): Endpoint.delete_caches( @@ -471,40 +392,6 @@ def dose_groups(self): def isAnimalsDosed(self, animal_group): return self.dosed_animals == animal_group - @staticmethod - def flat_complete_header_row(): - return ( - "dosing_regime-id", - "dosing_regime-dosed_animals", - "dosing_regime-route_of_exposure", - "dosing_regime-duration_exposure", - "dosing_regime-duration_exposure_text", - "dosing_regime-duration_observation", - "dosing_regime-num_dose_groups", - "dosing_regime-positive_control", - "dosing_regime-negative_control", - "dosing_regime-description", - ) - - @staticmethod - def flat_complete_data_row(ser): - return ( - ( - ser["id"], - AnimalGroup.get_relation_id(ser["dosed_animals"]), - ser["route_of_exposure"], - ser["duration_exposure"], - ser["duration_exposure_text"], - ser["duration_observation"], - ser["num_dose_groups"], - ser["positive_control"], - ser["negative_control"], - cleanHTML(ser["description"]), - ) - if ser - else (None for _ in range(10)) - ) - def can_delete(self) -> bool: # can delete only if no animals others than those dosed are related return self.animalgroup_set.exclude(id=self.dosed_animals_id).count() == 0 @@ -542,21 +429,6 @@ class Meta: def __str__(self): return f"{self.dose} {self.dose_units}" - @staticmethod - def flat_complete_data_row(ser_full, units, idx): - cols = [] - ser = [v for v in ser_full if v["dose_group_id"] == idx] - for unit in units: - v = None - for s in ser: - if s["dose_units"]["name"] == unit: - v = s["dose"] - break - - cols.append(v) - - return cols - class Endpoint(BaseEndpoint): objects = managers.EndpointManager() @@ -1040,88 +912,6 @@ def dataset_increasing(self): change += resps[i] - resps[0] return change >= 0 - @staticmethod - def flat_complete_header_row(): - return ( - "endpoint-id", - "endpoint-url", - "endpoint-name", - "endpoint-effects", - "endpoint-system", - "endpoint-organ", - "endpoint-effect", - "endpoint-effect_subtype", - "endpoint-name_term_id", - "endpoint-system_term_id", - "endpoint-organ_term_id", - "endpoint-effect_term_id", - "endpoint-effect_subtype_term_id", - "endpoint-litter_effects", - "endpoint-litter_effect_notes", - "endpoint-observation_time", - "endpoint-observation_time_units", - "endpoint-observation_time_text", - "endpoint-data_location", - "endpoint-response_units", - "endpoint-data_type", - "endpoint-variance_type", - "endpoint-confidence_interval", - "endpoint-data_reported", - "endpoint-data_extracted", - "endpoint-values_estimated", - "endpoint-expected_adversity_direction", - "endpoint-monotonicity", - "endpoint-statistical_test", - "endpoint-trend_value", - "endpoint-trend_result", - "endpoint-diagnostic", - "endpoint-power_notes", - "endpoint-results_notes", - "endpoint-endpoint_notes", - "endpoint-additional_fields", - ) - - @staticmethod - def flat_complete_data_row(ser): - return ( - ser["id"], - ser["url"], - ser["name"], - "|".join([d["name"] for d in ser["effects"]]), - ser["system"], - ser["organ"], - ser["effect"], - ser["effect_subtype"], - ser["name_term"], - ser["system_term"], - ser["organ_term"], - ser["effect_term"], - ser["effect_subtype_term"], - ser["litter_effects"], - ser["litter_effect_notes"], - ser["observation_time"], - ser["observation_time_units"], - ser["observation_time_text"], - ser["data_location"], - ser["response_units"], - ser["data_type"], - ser["variance_name"], - ser["confidence_interval"], - ser["data_reported"], - ser["data_extracted"], - ser["values_estimated"], - ser["expected_adversity_direction_text"], - ser["monotonicity"], - ser["statistical_test"], - ser["trend_value"], - ser["trend_result"], - ser["diagnostic"], - ser["power_notes"], - cleanHTML(ser["results_notes"]), - cleanHTML(ser["endpoint_notes"]), - json.dumps(ser["additional_fields"]), - ) - @staticmethod def setMaximumPercentControlChange(ep): """ @@ -1391,44 +1181,6 @@ def getNRangeText(ns): else: return f"{nmin}-{nmax}" - @staticmethod - def flat_complete_header_row(): - return ( - "endpoint_group-id", - "endpoint_group-dose_group_id", - "endpoint_group-n", - "endpoint_group-incidence", - "endpoint_group-response", - "endpoint_group-variance", - "endpoint_group-lower_ci", - "endpoint_group-upper_ci", - "endpoint_group-significant", - "endpoint_group-significance_level", - "endpoint_group-treatment_effect", - "endpoint_group-NOEL", - "endpoint_group-LOEL", - "endpoint_group-FEL", - ) - - @staticmethod - def flat_complete_data_row(ser, endpoint): - return ( - ser["id"], - ser["dose_group_id"], - ser["n"], - ser["incidence"], - ser["response"], - ser["variance"], - ser["lower_ci"], - ser["upper_ci"], - ser["significant"], - ser["significance_level"], - ser["treatment_effect"], - ser["dose_group_id"] == endpoint["NOEL"], - ser["dose_group_id"] == endpoint["LOEL"], - ser["dose_group_id"] == endpoint["FEL"], - ) - reversion.register(Experiment) reversion.register(AnimalGroup) diff --git a/hawc/apps/assessment/exports.py b/hawc/apps/assessment/exports.py index fb40647421..8aee397ebe 100644 --- a/hawc/apps/assessment/exports.py +++ b/hawc/apps/assessment/exports.py @@ -1,9 +1,41 @@ import pandas as pd +from django.conf import settings +from ..common.exports import ModelExport from ..common.helper import FlatFileExporter +from ..common.models import sql_format from .models import AssessmentValue +class DSSToxExport(ModelExport): + def get_value_map(self): + return { + "dtxsid": "dtxsid", + "dashboard_url": "dashboard_url", + "img_url": "img_url", + "content": "content", + "created": "created", + "last_updated": "last_updated", + } + + def get_annotation_map(self, query_prefix): + img_url_str = ( + f"https://api-ccte.epa.gov/chemical/file/image/search/by-dtxsid/{{}}?x-api-key={settings.CCTE_API_KEY}" + if settings.CCTE_API_KEY + else "https://comptox.epa.gov/dashboard-api/ccdapp1/chemical-files/image/by-dtxsid/{}" + ) + return { + "dashboard_url": sql_format( + "https://comptox.epa.gov/dashboard/dsstoxdb/results?search={}", + query_prefix + "dtxsid", + ), + "img_url": sql_format(img_url_str, query_prefix + "dtxsid"), + } + + def prepare_df(self, df): + return self.format_time(df) + + class ValuesListExport(FlatFileExporter): def build_df(self) -> pd.DataFrame: return AssessmentValue.objects.get_df() diff --git a/hawc/apps/common/exports.py b/hawc/apps/common/exports.py index 6693a06ad2..0ae15eda61 100644 --- a/hawc/apps/common/exports.py +++ b/hawc/apps/common/exports.py @@ -1,5 +1,6 @@ import pandas as pd from django.db.models import QuerySet +from django.utils import timezone from .helper import FlatExport @@ -14,18 +15,10 @@ def __init__( include: tuple[str, ...] | None = None, exclude: tuple[str, ...] | None = None, ): - """Instantiate an exporter instance for a given django model. - - Args: - key_prefix (str, optional): The model name to prepend to data frame columns. - query_prefix (str, optional): The model prefix in the ORM. - include (tuple | None, optional): If included, only these items are added. - exclude (tuple | None, optional): If specified, items are removed from base. - """ self.key_prefix = key_prefix + "-" if key_prefix else key_prefix self.query_prefix = query_prefix + "__" if query_prefix else query_prefix - self.include = (key_prefix + field for field in include) if include else tuple() - self.exclude = (key_prefix + field for field in exclude) if exclude else tuple() + self.include = tuple(self.key_prefix + field for field in include) if include else tuple() + self.exclude = tuple(self.key_prefix + field for field in exclude) if exclude else tuple() @property def value_map(self) -> dict: @@ -153,6 +146,15 @@ def prepare_df(self, df: pd.DataFrame) -> pd.DataFrame: """ return df + def format_time(self, df: pd.DataFrame) -> pd.DataFrame: + if df.shape[0] == 0: + return df + tz = timezone.get_default_timezone() + for key in [self.get_column_name("created"), self.get_column_name("last_updated")]: + if key in df.columns and not df[key].isnull().all(): + df[key] = df[key].dt.tz_convert(tz).dt.strftime("%Y-%m-%dT%H:%M:%S.%f%z") + return df + def get_df(self, qs: QuerySet) -> pd.DataFrame: """Get dataframe export from queryset. @@ -208,13 +210,16 @@ def get_df(self, qs: QuerySet) -> pd.DataFrame: df = module.prepare_df(df) return df + @classmethod + def build_metadata(cls, df: pd.DataFrame) -> pd.DataFrame | None: + return None + @classmethod def flat_export(cls, qs: QuerySet, filename: str) -> FlatExport: """Return an instance of a FlatExport. - Args: qs (QuerySet): the initial QuerySet filename (str): the filename for the export """ df = cls().get_df(qs) - return FlatExport(df=df, filename=filename) + return FlatExport(df=df, filename=filename, metadata=cls.build_metadata(df)) diff --git a/hawc/apps/common/helper.py b/hawc/apps/common/helper.py index 9a06d064a7..b951f81dfe 100644 --- a/hawc/apps/common/helper.py +++ b/hawc/apps/common/helper.py @@ -142,22 +142,61 @@ def map_enum(df: pd.DataFrame, field: str, choices: Choices, replace: bool = Fal df.rename(columns={key: field}, inplace=True) -def df_move_column(df: pd.DataFrame, target: str, after: str | None = None) -> pd.DataFrame: +def reorder_list(items: list, target: Any, after: Any | None = None, n_cols: int = 1) -> list: + """Returns a copy of a list with elements reordered. + + Args: + items (list): a list of items + target (Any): the key to move + after (Any | None, default None): the key to move target after. + n_cols (int, default 1): the number of sequential targets to move. + + Raises: + NotImplementedError: _description_ + + Returns: + list: _description_ + """ + target_index = items.index(target) + target_index_max = target_index + n_cols + insert_index = (-1 if after is None else items.index(after)) + 1 + + if target_index == insert_index: + return items + elif target_index > (insert_index): + return ( + items[:insert_index] + + items[target_index:target_index_max] + + items[insert_index:target_index] + + items[target_index_max:] + ) + elif target_index_max < insert_index: + return ( + items[:target_index] + + items[target_index_max:insert_index] + + items[target_index:target_index_max] + + items[insert_index:] + ) + else: + raise NotImplementedError("Unreachable code") + + +def df_move_column( + df: pd.DataFrame, target: str, after: str | None = None, n_cols: int = 1 +) -> pd.DataFrame: """Move target column after another column. Args: df (pd.DataFrame): The dataframe to modify. target (str): Name of the column to move - after (Optional[str], optional): Name of column to move after; if None, puts first. + after (Optional[str], optional): Name of column to move after; if None, puts first + n_cols (int): Number of target columns to move; defaults to 1 Returns: pd.DataFrame: The mutated dataframe. """ - cols = df.columns.tolist() - target_name = cols.pop(cols.index(target)) - insert_index = cols.index(after) + 1 if after else 0 - cols.insert(insert_index, target_name) - return df[cols] + new_cols = reorder_list(df.columns.tolist(), target, after, n_cols) + return df[new_cols] def url_query(path: str, query: dict) -> str: @@ -548,3 +587,16 @@ def get_current_request() -> HttpRequest: def get_current_user(): """Returns the current request user""" return get_current_request().user + + +def unique_text_list(items: list[str]) -> list[str]: + """Return a list of unique items in a text list""" + items = items.copy() + duplicates = {} + for i, item in enumerate(items): + if item in duplicates: + duplicates[item] += 1 + items[i] = f"{item} ({duplicates[item]})" + else: + duplicates[item] = 1 + return items diff --git a/hawc/apps/common/models.py b/hawc/apps/common/models.py index be62f288d7..90322922dc 100644 --- a/hawc/apps/common/models.py +++ b/hawc/apps/common/models.py @@ -508,19 +508,21 @@ def include_related( return queryset | queryset.model.objects.filter(filters) -def sql_display(name: str, Choice: type[Choices]) -> Case: - """Create a annotation to return the display name via SQL +def sql_display(name: str, choice: type[Choices] | dict, default="?") -> Case: + """Create an annotation to return the display name via SQL Args: name (str): the field name - Choice (type[Choices]): a choice field + choice (type[Choices]): a choice field or dict of choices + default: default value if display value is not found Returns: Case: the case statement for use in an annotation """ + choices = choice.items() if isinstance(choice, dict) else choice.choices return Case( - *(When(**{name: key, "then": Value(value)}) for key, value in Choice.choices), - default=Value("?"), + *(When(**{name: key, "then": Value(value)}) for key, value in choices), + default=Value(default), ) diff --git a/hawc/apps/epi/exports.py b/hawc/apps/epi/exports.py index befea2868c..62642961b8 100644 --- a/hawc/apps/epi/exports.py +++ b/hawc/apps/epi/exports.py @@ -1,266 +1,654 @@ +import math + +import pandas as pd +from django.db.models import Case, Q, When +from scipy.stats import t + +from ..common.exports import Exporter, ModelExport from ..common.helper import FlatFileExporter -from ..materialized.models import FinalRiskOfBiasScore -from ..study.models import Study -from . import models +from ..common.models import sql_display, sql_format, str_m2m +from ..materialized.exports import get_final_score_df +from ..study.exports import StudyExport +from . import constants, models -class OutcomeComplete(FlatFileExporter): - def _get_header_row(self): - header = [] - header.extend(Study.flat_complete_header_row()) - header.extend(models.StudyPopulation.flat_complete_header_row()) - header.extend(models.Outcome.flat_complete_header_row()) - header.extend(models.Exposure.flat_complete_header_row()) - header.extend(models.ComparisonSet.flat_complete_header_row()) - header.extend(models.Result.flat_complete_header_row()) - header.extend(models.Group.flat_complete_header_row()) - header.extend(models.GroupResult.flat_complete_header_row()) - return header - - def _get_data_rows(self): - rows = [] - identifiers_df = Study.identifiers_df(self.queryset, "study_population__study_id") - for obj in self.queryset: - ser = obj.get_json(json_encode=False) - row = [] - row.extend( - Study.flat_complete_data_row(ser["study_population"]["study"], identifiers_df) +def percent_control(n_1, mu_1, sd_1, n_2, mu_2, sd_2): + mean = low = high = None + + if mu_1 and mu_2 and mu_1 != 0: + mean = (mu_2 - mu_1) / mu_1 * 100.0 + if sd_1 and sd_2 and n_1 and n_2: + sd = math.sqrt( + pow(mu_1, -2) + * ((pow(sd_2, 2) / n_2) + (pow(mu_2, 2) * pow(sd_1, 2)) / (n_1 * pow(mu_1, 2))) ) - row.extend(models.StudyPopulation.flat_complete_data_row(ser["study_population"])) - row.extend(models.Outcome.flat_complete_data_row(ser)) - for res in ser["results"]: - row_copy = list(row) - row_copy.extend( - models.Exposure.flat_complete_data_row(res["comparison_set"]["exposure"]) - ) - row_copy.extend(models.ComparisonSet.flat_complete_data_row(res["comparison_set"])) - row_copy.extend(models.Result.flat_complete_data_row(res)) - for rg in res["results"]: - row_copy2 = list(row_copy) - row_copy2.extend(models.Group.flat_complete_data_row(rg["group"])) - row_copy2.extend(models.GroupResult.flat_complete_data_row(rg)) - rows.append(row_copy2) - return rows + ci = (1.96 * sd) * 100 + rng = sorted([mean - ci, mean + ci]) + low = rng[0] + high = rng[1] + return mean, low, high -class OutcomeDataPivot(FlatFileExporter): - def _get_header_row(self): - if self.queryset.first() is None: - self.rob_headers, self.rob_data = {}, {} - else: - outcome_ids = set(self.queryset.values_list("id", flat=True)) - self.rob_headers, self.rob_data = FinalRiskOfBiasScore.get_dp_export( - self.queryset.first().assessment_id, - outcome_ids, - "epi", - ) - headers = [ - "study id", - "study name", - "study identifier", - "study published", - "study population id", - "study population name", - "study population age profile", - "study population source", - "design", - "outcome id", - "outcome name", - "outcome system", - "outcome effect", - "outcome effect subtype", - "diagnostic", - "age of outcome measurement", - "tags", +class StudyPopulationExport(ModelExport): + def get_value_map(self): + return { + "id": "id", + "url": "url", + "name": "name", + "design": "design_display", + "age_profile": "age_profile", + "source": "source", + "countries": "countries__name", + "region": "region", + "state": "state", + "eligible_n": "eligible_n", + "invited_n": "invited_n", + "participant_n": "participant_n", + "inclusion_criteria": "inclusion_criteria", + "exclusion_criteria": "exclusion_criteria", + "confounding_criteria": "confounding_criteria", + "comments": "comments", + "created": "created", + "last_updated": "last_updated", + } + + def get_annotation_map(self, query_prefix): + return { + "url": sql_format("/epi/study-population/{}/", query_prefix + "id"), # hardcoded URL + "design_display": sql_display(query_prefix + "design", constants.Design), + "countries__name": str_m2m(query_prefix + "countries__name"), + "inclusion_criteria": str_m2m( + query_prefix + "spcriteria__criteria__description", + filter=Q(**{query_prefix + "spcriteria__criteria_type": constants.CriteriaType.I}), + ), + "exclusion_criteria": str_m2m( + query_prefix + "spcriteria__criteria__description", + filter=Q(**{query_prefix + "spcriteria__criteria_type": constants.CriteriaType.E}), + ), + "confounding_criteria": str_m2m( + query_prefix + "spcriteria__criteria__description", + filter=Q(**{query_prefix + "spcriteria__criteria_type": constants.CriteriaType.C}), + ), + } + + def prepare_df(self, df): + return self.format_time(df) + + +class OutcomeExport(ModelExport): + def get_value_map(self): + return { + "id": "id", + "url": "url", + "name": "name", + "effects": "effects__name", + "system": "system", + "effect": "effect", + "effect_subtype": "effect_subtype", + "diagnostic": "diagnostic_display", + "diagnostic_description": "diagnostic_description", + "age_of_measurement": "age_of_measurement", + "outcome_n": "outcome_n", + "summary": "summary", + "created": "created", + "last_updated": "last_updated", + } + + def get_annotation_map(self, query_prefix): + return { + "url": sql_format("/epi/outcome/{}/", query_prefix + "id"), # hardcoded URL + "effects__name": str_m2m(query_prefix + "effects__name"), + "diagnostic_display": sql_display(query_prefix + "diagnostic", constants.Diagnostic), + } + + def prepare_df(self, df): + return self.format_time(df) + + +class ExposureExport(ModelExport): + def get_value_map(self): + return { + "id": "id", + "url": "url", + "name": "name", + "inhalation": "inhalation", + "dermal": "dermal", + "oral": "oral", + "in_utero": "in_utero", + "iv": "iv", + "unknown_route": "unknown_route", + "measured": "measured", + "metric": "metric", + "metric_units_id": "metric_units__id", + "metric_units_name": "metric_units__name", + "metric_description": "metric_description", + "analytical_method": "analytical_method", + "sampling_period": "sampling_period", + "age_of_exposure": "age_of_exposure", + "duration": "duration", + "n": "n", + "exposure_distribution": "exposure_distribution", + "description": "description", + "created": "created", + "last_updated": "last_updated", + } + + def get_annotation_map(self, query_prefix): + return { + "url": sql_format("/epi/exposure/{}/", query_prefix + "id"), # hardcoded URL + } + + def prepare_df(self, df): + return self.format_time(df) + + +class ComparisonSetExport(ModelExport): + def get_value_map(self): + return { + "id": "id", + "url": "url", + "name": "name", + "description": "description", + "created": "created", + "last_updated": "last_updated", + } + + def get_annotation_map(self, query_prefix): + return { + "url": sql_format("/epi/comparison-set/{}/", query_prefix + "id"), # hardcoded URL + } + + def prepare_df(self, df): + return self.format_time(df) + + +class ResultMetricExport(ModelExport): + def get_value_map(self): + return { + "id": "id", + "name": "metric", + "abbreviation": "abbreviation", + } + + +class ResultExport(ModelExport): + def get_value_map(self): + return { + "id": "id", + "name": "name", + "metric_description": "metric_description", + "metric_units": "metric_units", + "data_location": "data_location", + "population_description": "population_description", + "dose_response": "dose_response_display", + "dose_response_details": "dose_response_details", + "prevalence_incidence": "prevalence_incidence", + "statistical_power": "statistical_power_display", + "statistical_power_details": "statistical_power_details", + "statistical_test_results": "statistical_test_results", + "trend_test": "trend_test", + "adjustment_factors": "adjustment_factors", + "adjustment_factors_considered": "adjustment_factors_considered", + "estimate_type": "estimate_type_display", + "variance_type": "variance_type_display", + "ci_units": "ci_units", + "comments": "comments", + "created": "created", + "last_updated": "last_updated", + "tags": "tags", + } + + def get_annotation_map(self, query_prefix): + return { + "dose_response_display": sql_display( + query_prefix + "dose_response", constants.DoseResponse + ), + "adjustment_factors": str_m2m( + query_prefix + "resfactors__adjustment_factor__description", + filter=Q(**{query_prefix + "resfactors__included_in_final_model": True}), + ), + "adjustment_factors_considered": str_m2m( + query_prefix + "resfactors__adjustment_factor__description", + filter=Q(**{query_prefix + "resfactors__included_in_final_model": False}), + ), + "statistical_power_display": sql_display( + query_prefix + "statistical_power", constants.StatisticalPower + ), + "estimate_type_display": sql_display( + query_prefix + "estimate_type", constants.EstimateType + ), + "variance_type_display": sql_display( + query_prefix + "variance_type", constants.VarianceType + ), + "tags": str_m2m(query_prefix + "resulttags__name"), + } + + def prepare_df(self, df): + return self.format_time(df) + + +class GroupExport(ModelExport): + def get_value_map(self): + return { + "id": "id", + "group_id": "group_id", + "name": "name", + "numeric": "numeric", + "comparative_name": "comparative_name", + "sex": "sex_display", + "ethnicities": "ethnicities", + "eligible_n": "eligible_n", + "invited_n": "invited_n", + "participant_n": "participant_n", + "isControl": "isControl", + "comments": "comments", + "created": "created", + "last_updated": "last_updated", + } + + def get_annotation_map(self, query_prefix): + return { + "sex_display": sql_display(query_prefix + "sex", constants.Sex), + "ethnicities": str_m2m(query_prefix + "ethnicities__name"), + } + + def prepare_df(self, df): + return self.format_time(df) + + +class GroupResultExport(ModelExport): + def get_value_map(self): + return { + "id": "id", + "n": "n", + "estimate": "estimate", + "variance": "variance", + "lower_ci": "lower_ci", + "upper_ci": "upper_ci", + "lower_range": "lower_range", + "upper_range": "upper_range", + "lower_bound_interval": "lower_bound_interval", + "upper_bound_interval": "upper_bound_interval", + "p_value_qualifier": "p_value_qualifier_display", + "p_value": "p_value", + "is_main_finding": "is_main_finding", + "main_finding_support": "main_finding_support_display", + "created": "created", + "last_updated": "last_updated", + } + + def get_annotation_map(self, query_prefix): + return { + "lower_bound_interval": Case( + When(**{query_prefix + "lower_ci": None}, then=query_prefix + "lower_range"), + default=query_prefix + "lower_ci", + ), + "upper_bound_interval": Case( + When(**{query_prefix + "upper_ci": None}, then=query_prefix + "upper_range"), + default=query_prefix + "upper_ci", + ), + "p_value_qualifier_display": sql_display( + query_prefix + "p_value_qualifier", constants.PValueQualifier + ), + "main_finding_support_display": sql_display( + query_prefix + "main_finding_support", constants.MainFinding + ), + } + + def prepare_df(self, df): + return self.format_time(df) + + +class CentralTendencyExport(ModelExport): + def get_value_map(self): + return { + "estimate": "estimate", + "estimate_type": "estimate_type_display", + "variance": "variance", + "variance_type": "variance_type_display", + "lower_bound_interval": "lower_bound_interval", + "upper_bound_interval": "upper_bound_interval", + "lower_ci": "lower_ci", + "upper_ci": "upper_ci", + "lower_range": "lower_range", + "upper_range": "upper_range", + } + + def get_annotation_map(self, query_prefix): + return { + "estimate_type_display": sql_display( + query_prefix + "estimate_type", constants.EstimateType + ), + "variance_type_display": sql_display( + query_prefix + "variance_type", constants.VarianceType + ), + "lower_bound_interval": Case( + When(**{query_prefix + "lower_ci": None}, then=query_prefix + "lower_range"), + default=query_prefix + "lower_ci", + ), + "upper_bound_interval": Case( + When(**{query_prefix + "upper_ci": None}, then=query_prefix + "upper_range"), + default=query_prefix + "upper_ci", + ), + } + + +class EpiExporter(Exporter): + def build_modules(self) -> list[ModelExport]: + return [ + StudyExport("study", "study_population__study"), + StudyPopulationExport("sp", "study_population"), + OutcomeExport("outcome", ""), + ExposureExport("exposure", "results__comparison_set__exposure"), + ComparisonSetExport("cs", "results__comparison_set"), + ResultMetricExport("metric", "results__metric"), + ResultExport("result", "results", exclude=("tags",)), + GroupExport("group", "results__results__group"), + GroupResultExport("result_group", "results__results"), ] - headers.extend(list(self.rob_headers.values())) - - headers.extend( - [ - "comparison set id", - "comparison set name", - "exposure id", - "exposure name", - "exposure metric", - "exposure measured", - "dose units", - "age of exposure", - "exposure estimate", - "exposure estimate type", - "exposure variance", - "exposure variance type", - "exposure lower bound interval", - "exposure upper bound interval", - "exposure lower ci", - "exposure upper ci", - "exposure lower range", - "exposure upper range", - "result id", - "result name", - "result population description", - "result tags", - "statistical metric", - "statistical metric abbreviation", - "statistical metric description", - "result summary", - "dose response", - "statistical power", - "statistical test results", - "CI units", - "exposure group order", - "exposure group name", - "exposure group comparison name", - "exposure group numeric", - "Reference/Exposure group", - "Result, summary numerical", - "key", - "result group id", - "N", - "estimate", - "lower CI", - "upper CI", - "lower range", - "upper range", - "lower bound interval", - "upper bound interval", - "variance", - "statistical significance", - "statistical significance (numeric)", - "main finding", - "main finding support", - "percent control mean", - "percent control low", - "percent control high", - ] + +class OutcomeComplete(FlatFileExporter): + """ + Returns a complete export of all data required to rebuild the the + epidemiological meta-result study type from scratch. + """ + + def build_df(self) -> pd.DataFrame: + return EpiExporter().get_df(self.queryset) + + +class EpiDataPivotExporter(Exporter): + def build_modules(self) -> list[ModelExport]: + return [ + StudyExport( + "study", + "study_population__study", + include=("id", "short_citation", "study_identifier", "published"), + ), + StudyPopulationExport( + "sp", "study_population", include=("id", "name", "age_profile", "source", "design") + ), + OutcomeExport( + "outcome", + "", + include=( + "id", + "name", + "system", + "effect", + "effect_subtype", + "diagnostic", + "age_of_measurement", + "effects", + ), + ), + ComparisonSetExport("cs", "results__comparison_set", include=("id", "name")), + ExposureExport( + "exposure", + "results__comparison_set__exposure", + include=( + "id", + "name", + "metric", + "measured", + "metric_units_name", + "age_of_exposure", + ), + ), + CentralTendencyExport( + "ct", + "results__comparison_set__exposure__central_tendencies", + include=( + "estimate", + "estimate_type", + "variance", + "variance_type", + "lower_bound_interval", + "upper_bound_interval", + "lower_ci", + "upper_ci", + "lower_range", + "upper_range", + ), + ), + ResultExport( + "result", + "results", + include=( + "id", + "name", + "population_description", + "tags", + "metric_description", + "comments", + "dose_response", + "statistical_power", + "statistical_test_results", + "ci_units", + "estimate_type", + "variance_type", + ), + ), + ResultMetricExport("metric", "results__metric", include=("name", "abbreviation")), + GroupExport( + "group", + "results__results__group", + include=("group_id", "name", "comparative_name", "numeric", "isControl"), + ), + GroupResultExport( + "result_group", + "results__results", + include=( + "id", + "n", + "estimate", + "lower_ci", + "upper_ci", + "lower_range", + "upper_range", + "lower_bound_interval", + "upper_bound_interval", + "variance", + "p_value", + "p_value_qualifier", + "is_main_finding", + "main_finding_support", + ), + ), + ] + + +class OutcomeDataPivot(FlatFileExporter): + def _add_ci(self, df: pd.DataFrame) -> pd.DataFrame: + # if CI are not reported, calculate from mean/variance estimates. This code is identical + # to `GroupResult.getConfidenceIntervals`, but applied to this data frame + def _calc_cis(row): + if ( + row["result_group-lower_ci"] is None + and row["result_group-upper_ci"] is None + and row["result_group-n"] is not None + and row["result_group-estimate"] is not None + and row["result_group-variance"] is not None + and row["result_group-n"] > 0 + ): + n = row["result_group-n"] + est = row["result_group-estimate"] + var = row["result_group-variance"] + z = t.ppf(0.975, max(n - 1, 1)) + change = None + + if row["result-variance_type"] == "SD": + change = z * var / math.sqrt(n) + elif row["result-variance_type"] in ("SE", "SEM"): + change = z * var + + if change is not None: + return est - change, est + change + + return row["result_group-lower_ci"], row["result_group-upper_ci"] + + df[["result_group-lower_ci", "result_group-upper_ci"]] = df.apply( + _calc_cis, axis=1, result_type="expand" ) + return df + + def _add_percent_control(self, df: pd.DataFrame) -> pd.DataFrame: + def _get_stdev(x: pd.Series): + return models.GroupResult.stdev( + x["result-variance_type"], x["result_group-variance"], x["result_group-n"] + ) - return headers - - def _get_data_rows(self): - rows = [] - for obj in self.queryset: - ser = obj.get_json(json_encode=False) - row = [ - ser["study_population"]["study"]["id"], - ser["study_population"]["study"]["short_citation"], - ser["study_population"]["study"]["study_identifier"], - ser["study_population"]["study"]["published"], - ser["study_population"]["id"], - ser["study_population"]["name"], - ser["study_population"]["age_profile"], - ser["study_population"]["source"], - ser["study_population"]["design"], - ser["id"], - ser["name"], - ser["system"], - ser["effect"], - ser["effect_subtype"], - ser["diagnostic"], - ser["age_of_measurement"], - self.get_flattened_tags(ser, "effects"), - ] - outcome_robs = [ - self.rob_data[(ser["id"], metric_id)] for metric_id in self.rob_headers.keys() - ] - row.extend(outcome_robs) - - for res in ser["results"]: - row_copy = list(row) - - # comparison set - row_copy.extend([res["comparison_set"]["id"], res["comparison_set"]["name"]]) - - # exposure (may be missing) - if res["comparison_set"]["exposure"]: - row_copy.extend( - [ - res["comparison_set"]["exposure"]["id"], - res["comparison_set"]["exposure"]["name"], - res["comparison_set"]["exposure"]["metric"], - res["comparison_set"]["exposure"]["measured"], - res["comparison_set"]["exposure"]["metric_units"]["name"], - res["comparison_set"]["exposure"]["age_of_exposure"], - ] + def _apply_results(_df1: pd.DataFrame): + controls = _df1.loc[_df1["group-isControl"] == True] # noqa: E712 + control = _df1.iloc[0] if controls.empty else controls.iloc[0] + n_1 = control["result_group-n"] + mu_1 = control["result_group-estimate"] + sd_1 = _get_stdev(control) + + def _apply_result_groups(_df2: pd.DataFrame): + row = _df2.iloc[0] + if control["result-estimate_type"] in ["median", "mean"] and control[ + "result-variance_type" + ] in ["SD", "SE", "SEM"]: + n_2 = row["result_group-n"] + mu_2 = row["result_group-estimate"] + sd_2 = _get_stdev(row) + mean, low, high = percent_control(n_1, mu_1, sd_1, n_2, mu_2, sd_2) + return pd.DataFrame( + [[mean, low, high]], + columns=[ + "percent control mean", + "percent control low", + "percent control high", + ], + index=[row["result_group-id"]], ) + return pd.DataFrame( + [], + columns=[ + "percent control mean", + "percent control low", + "percent control high", + ], + ) - num_rows_for_ct = len(res["comparison_set"]["exposure"]["central_tendencies"]) - if num_rows_for_ct == 0: - row_copy.extend(["-"] * 10) - self.addOutcomesAndGroupsToRowAndAppend(rows, res, ser, row_copy) - else: - for ct in res["comparison_set"]["exposure"]["central_tendencies"]: - row_copy_ct = list(row_copy) - row_copy_ct.extend( - [ - ct["estimate"], - ct["estimate_type"], - ct["variance"], - ct["variance_type"], - ct["lower_bound_interval"], - ct["upper_bound_interval"], - ct["lower_ci"], - ct["upper_ci"], - ct["lower_range"], - ct["upper_range"], - ] - ) - self.addOutcomesAndGroupsToRowAndAppend(rows, res, ser, row_copy_ct) - - else: - row_copy.extend(["-"] * (6 + 10)) # exposure + exposure.central_tendencies - self.addOutcomesAndGroupsToRowAndAppend(rows, res, ser, row_copy) - - return rows - - def addOutcomesAndGroupsToRowAndAppend(self, rows, res, ser, row): - # outcome details - row.extend( - [ - res["id"], - res["name"], - res["population_description"], - self.get_flattened_tags(res, "resulttags"), - res["metric"]["metric"], - res["metric"]["abbreviation"], - res["metric_description"], - res["comments"], - res["dose_response"], - res["statistical_power"], - res["statistical_test_results"], - res["ci_units"], - ] + rgs = _df1.groupby("result_group-id", group_keys=False) + return rgs.apply(_apply_result_groups) + + results = df.groupby("result-id", group_keys=False) + computed_df = results.apply(_apply_results) + # empty groups may return original columns, so remove them + computed_df = computed_df[computed_df.columns.difference(df.columns)] + return df.join(computed_df, on="result_group-id").drop( + columns=["result-estimate_type", "result-variance_type", "group-isControl"] ) - for rg in res["results"]: - row_copy = list(row) - row_copy.extend( - [ - rg["group"]["group_id"], - rg["group"]["name"], - rg["group"]["comparative_name"], - rg["group"]["numeric"], - f'{ser["study_population"]["study"]["short_citation"]} ({rg["group"]["name"]}, n={rg["n"]})', - f'{rg["estimate"]} ({rg["lower_ci"]} - {rg["upper_ci"]})', - rg["id"], - rg["id"], # repeat for data-pivot key - rg["n"], - rg["estimate"], - rg["lower_ci"], - rg["upper_ci"], - rg["lower_range"], - rg["upper_range"], - rg["lower_bound_interval"], - rg["upper_bound_interval"], - rg["variance"], - rg["p_value_text"], - rg["p_value"], - rg["is_main_finding"], - rg["main_finding_support"], - rg["percentControlMean"], - rg["percentControlLow"], - rg["percentControlHigh"], - ] - ) - rows.append(row_copy) + def build_df(self) -> pd.DataFrame: + df = EpiDataPivotExporter().get_df(self.queryset.order_by("id", "results__results")) + if obj := self.queryset.first(): + outcome_ids = list(df["outcome-id"].unique()) + rob_df = get_final_score_df(obj.assessment_id, outcome_ids, "epi") + df = df.join(rob_df, on="outcome-id") + + df["Reference/Exposure group"] = ( + df["study-short_citation"] + + " (" + + df["group-name"] + + ", n=" + + df["result_group-n"].astype(str) + + ")" + ) + df["Result, summary numerical"] = ( + df["result_group-estimate"].astype(str) + + " (" + + df["result_group-lower_ci"].astype(str) + + " - " + + df["result_group-upper_ci"].astype(str) + + ")" + ) + df["key"] = df["result_group-id"] + df["statistical significance"] = df.apply( + lambda x: x["result_group-p_value_qualifier"] + if pd.isna(x["result_group-p_value"]) + else f"{x['result_group-p_value']:g}" + if x["result_group-p_value_qualifier"] in ["=", "-", "n.s."] + else f"{x['result_group-p_value_qualifier']}{x['result_group-p_value']:g}", + axis="columns", + ) + df = df.drop(columns="result_group-p_value_qualifier") + + df = self._add_ci(df) + df = self._add_percent_control(df) + + df = df.rename( + columns={ + "study-id": "study id", + "study-short_citation": "study name", + "study-study_identifier": "study identifier", + "study-published": "study published", + "sp-id": "study population id", + "sp-name": "study population name", + "sp-age_profile": "study population age profile", + "sp-source": "study population source", + "sp-design": "design", + "outcome-id": "outcome id", + "outcome-name": "outcome name", + "outcome-system": "outcome system", + "outcome-effect": "outcome effect", + "outcome-effect_subtype": "outcome effect subtype", + "outcome-diagnostic": "diagnostic", + "outcome-age_of_measurement": "age of outcome measurement", + "outcome-effects": "tags", + "cs-id": "comparison set id", + "cs-name": "comparison set name", + "exposure-id": "exposure id", + "exposure-name": "exposure name", + "exposure-metric": "exposure metric", + "exposure-measured": "exposure measured", + "exposure-metric_units_name": "dose units", + "exposure-age_of_exposure": "age of exposure", + "ct-estimate": "exposure estimate", + "ct-estimate_type": "exposure estimate type", + "ct-variance": "exposure variance", + "ct-variance_type": "exposure variance type", + "ct-lower_bound_interval": "exposure lower bound interval", + "ct-upper_bound_interval": "exposure upper bound interval", + "ct-lower_ci": "exposure lower ci", + "ct-upper_ci": "exposure upper ci", + "ct-lower_range": "exposure lower range", + "ct-upper_range": "exposure upper range", + "result-id": "result id", + "result-name": "result name", + "result-population_description": "result population description", + "result-tags": "result tags", + "metric-name": "statistical metric", + "metric-abbreviation": "statistical metric abbreviation", + "result-metric_description": "statistical metric description", + "result-comments": "result summary", + "result-dose_response": "dose response", + "result-statistical_power": "statistical power", + "result-statistical_test_results": "statistical test results", + "result-ci_units": "CI units", + "group-group_id": "exposure group order", + "group-name": "exposure group name", + "group-comparative_name": "exposure group comparison name", + "group-numeric": "exposure group numeric", + "result_group-id": "result group id", + "result_group-n": "N", + "result_group-estimate": "estimate", + "result_group-lower_ci": "lower CI", + "result_group-upper_ci": "upper CI", + "result_group-lower_range": "lower range", + "result_group-upper_range": "upper range", + "result_group-lower_bound_interval": "lower bound interval", + "result_group-upper_bound_interval": "upper bound interval", + "result_group-variance": "variance", + "result_group-p_value": "statistical significance (numeric)", + "result_group-is_main_finding": "main finding", + "result_group-main_finding_support": "main finding support", + } + ) + + return df diff --git a/hawc/apps/epi/models.py b/hawc/apps/epi/models.py index 88f4a4dccf..a70edec971 100644 --- a/hawc/apps/epi/models.py +++ b/hawc/apps/epi/models.py @@ -206,57 +206,6 @@ class StudyPopulation(models.Model): BREADCRUMB_PARENT = "study" - @staticmethod - def flat_complete_header_row(): - return ( - "sp-id", - "sp-url", - "sp-name", - "sp-design", - "sp-age_profile", - "sp-source", - "sp-countries", - "sp-region", - "sp-state", - "sp-eligible_n", - "sp-invited_n", - "sp-participant_n", - "sp-inclusion_criteria", - "sp-exclusion_criteria", - "sp-confounding_criteria", - "sp-comments", - "sp-created", - "sp-last_updated", - ) - - @staticmethod - def flat_complete_data_row(ser): - def getCriteriaList(lst, filt): - return "|".join( - [d["description"] for d in [d for d in lst if d["criteria_type"] == filt]] - ) - - return ( - ser["id"], - ser["url"], - ser["name"], - ser["design"], - ser["age_profile"], - ser["source"], - "|".join([c["name"] for c in ser["countries"]]), - ser["region"], - ser["state"], - ser["eligible_n"], - ser["invited_n"], - ser["participant_n"], - getCriteriaList(ser["criteria"], "Inclusion"), - getCriteriaList(ser["criteria"], "Exclusion"), - getCriteriaList(ser["criteria"], "Confounding"), - ser["comments"], - ser["created"], - ser["last_updated"], - ) - class Meta: ordering = ("name",) @@ -379,44 +328,6 @@ def get_absolute_url(self): def can_create_sets(self): return not self.study_population.can_create_sets() - @staticmethod - def flat_complete_header_row(): - return ( - "outcome-id", - "outcome-url", - "outcome-name", - "outcome-effects", - "outcome-system", - "outcome-effect", - "outcome-effect_subtype", - "outcome-diagnostic", - "outcome-diagnostic_description", - "outcome-age_of_measurement", - "outcome-outcome_n", - "outcome-summary", - "outcome-created", - "outcome-last_updated", - ) - - @staticmethod - def flat_complete_data_row(ser): - return ( - ser["id"], - ser["url"], - ser["name"], - "|".join([str(d["name"]) for d in ser["effects"]]), - ser["system"], - ser["effect"], - ser["effect_subtype"], - ser["diagnostic"], - ser["diagnostic_description"], - ser["age_of_measurement"], - ser["outcome_n"], - ser["summary"], - ser["created"], - ser["last_updated"], - ) - def get_study(self): return self.study_population.get_study() @@ -486,28 +397,6 @@ def get_assessment(self): def __str__(self): return self.name - @staticmethod - def flat_complete_header_row(): - return ( - "cs-id", - "cs-url", - "cs-name", - "cs-description", - "cs-created", - "cs-last_updated", - ) - - @staticmethod - def flat_complete_data_row(ser): - return ( - ser["id"], - ser["url"], - ser["name"], - ser["description"], - ser["created"], - ser["last_updated"], - ) - def get_study(self): if self.study_population: return self.study_population.get_study() @@ -590,44 +479,6 @@ def get_assessment(self): def __str__(self): return self.name - @staticmethod - def flat_complete_header_row(): - return ( - "group-id", - "group-group_id", - "group-name", - "group-numeric", - "group-comparative_name", - "group-sex", - "group-ethnicities", - "group-eligible_n", - "group-invited_n", - "group-participant_n", - "group-isControl", - "group-comments", - "group-created", - "group-last_updated", - ) - - @staticmethod - def flat_complete_data_row(ser): - return ( - ser["id"], - ser["group_id"], - ser["name"], - ser["numeric"], - ser["comparative_name"], - ser["sex"], - "|".join([d["name"] for d in ser["ethnicities"]]), - ser["eligible_n"], - ser["invited_n"], - ser["participant_n"], - ser["isControl"], - ser["comments"], - ser["created"], - ser["last_updated"], - ) - class Exposure(models.Model): objects = managers.ExposureManager() @@ -771,65 +622,6 @@ def get_absolute_url(self): def delete_caches(cls, ids): SerializerHelper.delete_caches(cls, ids) - @staticmethod - def flat_complete_header_row(): - return ( - "exposure-id", - "exposure-url", - "exposure-name", - "exposure-inhalation", - "exposure-dermal", - "exposure-oral", - "exposure-in_utero", - "exposure-iv", - "exposure-unknown_route", - "exposure-measured", - "exposure-metric", - "exposure-metric_units_id", - "exposure-metric_units_name", - "exposure-metric_description", - "exposure-analytical_method", - "exposure-sampling_period", - "exposure-age_of_exposure", - "exposure-duration", - "exposure-n", - "exposure-exposure_distribution", - "exposure-description", - "exposure-created", - "exposure-last_updated", - ) - - @staticmethod - def flat_complete_data_row(ser): - if ser is None: - ser = {} - units = ser.get("metric_units", {}) - return ( - ser.get("id"), - ser.get("url"), - ser.get("name"), - ser.get("inhalation"), - ser.get("dermal"), - ser.get("oral"), - ser.get("in_utero"), - ser.get("iv"), - ser.get("unknown_route"), - ser.get("measured"), - ser.get("metric"), - units.get("id"), - units.get("name"), - ser.get("metric_description"), - ser.get("analytical_method"), - ser.get("sampling_period"), - ser.get("age_of_exposure"), - ser.get("duration"), - ser.get("n"), - ser.get("exposure_distribution"), - ser.get("description"), - ser.get("created"), - ser.get("last_updated"), - ) - def get_study(self): return self.study_population.get_study() @@ -891,42 +683,6 @@ class Meta: def __str__(self): return f"{{CT id={self.id}, exposure={self.exposure}}}" - @staticmethod - def flat_complete_header_row(): - return ( - "central_tendency-id", - "central_tendency-estimate", - "central_tendency-estimate_type", - "central_tendency-variance", - "central_tendency-variance_type", - "central_tendency-lower_ci", - "central_tendency-upper_ci", - "central_tendency-lower_range", - "central_tendency-upper_range", - "central_tendency-description", - "central_tendency-lower_bound_interval", - "central_tendency-upper_bound_interval", - ) - - @staticmethod - def flat_complete_data_row(ser): - if ser is None: - ser = {} - return ( - ser.get("id"), - ser.get("estimate"), - ser.get("estimate_type"), - ser.get("variance"), - ser.get("variance_type"), - ser.get("lower_ci"), - ser.get("upper_ci"), - ser.get("lower_range"), - ser.get("upper_range"), - ser.get("description"), - ser.get("lower_bound_interval"), - ser.get("upper_bound_interval"), - ) - class GroupNumericalDescriptions(models.Model): objects = managers.GroupNumericalDescriptionsManager() @@ -1131,72 +887,6 @@ def get_assessment(self): def get_absolute_url(self): return reverse("epi:result_detail", args=(self.pk,)) - @staticmethod - def flat_complete_header_row(): - return ( - "metric-id", - "metric-name", - "metric-abbreviation", - "result-id", - "result-name", - "result-metric_description", - "result-metric_units", - "result-data_location", - "result-population_description", - "result-dose_response", - "result-dose_response_details", - "result-prevalence_incidence", - "result-statistical_power", - "result-statistical_power_details", - "result-statistical_test_results", - "result-trend_test", - "result-adjustment_factors", - "result-adjustment_factors_considered", - "result-estimate_type", - "result-variance_type", - "result-ci_units", - "result-comments", - "result-created", - "result-last_updated", - ) - - @staticmethod - def flat_complete_data_row(ser): - def getFactorList(lst, isIncluded): - return "|".join( - [ - d["description"] - for d in [d for d in lst if d["included_in_final_model"] == isIncluded] - ] - ) - - return ( - ser["metric"]["id"], - ser["metric"]["metric"], - ser["metric"]["abbreviation"], - ser["id"], - ser["name"], - ser["metric_description"], - ser["metric_units"], - ser["data_location"], - ser["population_description"], - ser["dose_response"], - ser["dose_response_details"], - ser["prevalence_incidence"], - ser["statistical_power"], - ser["statistical_power_details"], - ser["statistical_test_results"], - ser["trend_test"], - getFactorList(ser["factors"], True), - getFactorList(ser["factors"], False), - ser["estimate_type"], - ser["variance_type"], - ser["ci_units"], - ser["comments"], - ser["created"], - ser["last_updated"], - ) - def get_study(self): return self.outcome.get_study() @@ -1427,48 +1117,6 @@ def lower_bound_interval(self): def upper_bound_interval(self): return self.upper_range if self.upper_ci is None else self.upper_ci - @staticmethod - def flat_complete_header_row(): - return ( - "result_group-id", - "result_group-n", - "result_group-estimate", - "result_group-variance", - "result_group-lower_ci", - "result_group-upper_ci", - "result_group-lower_range", - "result_group-upper_range", - "result_group-lower_bound_interval", - "result_group-upper_bound_interval", - "result_group-p_value_qualifier", - "result_group-p_value", - "result_group-is_main_finding", - "result_group-main_finding_support", - "result_group-created", - "result_group-last_updated", - ) - - @staticmethod - def flat_complete_data_row(ser): - return ( - ser["id"], - ser["n"], - ser["estimate"], - ser["variance"], - ser["lower_ci"], - ser["upper_ci"], - ser["lower_range"], - ser["upper_range"], - ser["lower_bound_interval"], - ser["upper_bound_interval"], - ser["p_value_qualifier_display"], - ser["p_value"], - ser["is_main_finding"], - ser["main_finding_support"], - ser["created"], - ser["last_updated"], - ) - @staticmethod def stdev(variance_type, variance, n): # calculate stdev given re diff --git a/hawc/apps/epimeta/exports.py b/hawc/apps/epimeta/exports.py index 1c29ede1b2..426a17b1d4 100644 --- a/hawc/apps/epimeta/exports.py +++ b/hawc/apps/epimeta/exports.py @@ -1,6 +1,158 @@ +import pandas as pd + +from ..common.exports import Exporter, ModelExport from ..common.helper import FlatFileExporter -from ..study.models import Study -from . import models +from ..common.models import sql_display, sql_format, str_m2m +from ..epi.exports import ResultMetricExport +from ..study.exports import StudyExport +from . import constants + + +class MetaProtocolExport(ModelExport): + def get_value_map(self): + return { + "pk": "pk", + "url": "url", + "name": "name", + "protocol_type": "protocol_type", + "lit_search_strategy": "lit_search_strategy", + "lit_search_notes": "lit_search_notes", + "lit_search_start_date": "lit_search_start_date", + "lit_search_end_date": "lit_search_end_date", + "total_references": "total_references", + "inclusion_criteria": "inclusion_criteria", + "exclusion_criteria": "exclusion_criteria", + "total_studies_identified": "total_studies_identified", + "notes": "notes", + } + + def get_annotation_map(self, query_prefix): + return { + "url": sql_format("/epi-meta/protocol/{}/", query_prefix + "id"), # hardcoded URL + "protocol_type": sql_display(query_prefix + "protocol_type", constants.MetaProtocol), + "lit_search_strategy": sql_display( + query_prefix + "lit_search_strategy", constants.MetaLitSearch + ), + "inclusion_criteria": str_m2m(query_prefix + "inclusion_criteria__description"), + "exclusion_criteria": str_m2m(query_prefix + "exclusion_criteria__description"), + } + + def prepare_df(self, df): + for key in [ + self.get_column_name("lit_search_start_date"), + self.get_column_name("lit_search_end_date"), + ]: + if key in df.columns: + df.loc[:, key] = df[key].apply(lambda x: x.isoformat() if not pd.isna(x) else x) + return df + + +class MetaResultExport(ModelExport): + def get_value_map(self): + return { + "pk": "pk", + "url": "url", + "label": "label", + "data_location": "data_location", + "health_outcome": "health_outcome", + "health_outcome_notes": "health_outcome_notes", + "exposure_name": "exposure_name", + "exposure_details": "exposure_details", + "number_studies": "number_studies", + "statistical_metric": "metric__metric", + "statistical_notes": "statistical_notes", + "n": "n", + "estimate": "estimate", + "lower_ci": "lower_ci", + "upper_ci": "upper_ci", + "ci_units": "ci_units", + "heterogeneity": "heterogeneity", + "adjustment_factors": "adjustment_factors_str", + "notes": "notes", + } + + def get_annotation_map(self, query_prefix): + return { + "url": sql_format("/epi-meta/result/{}/", query_prefix + "id"), # hardcoded URL + "adjustment_factors_str": str_m2m(query_prefix + "adjustment_factors__description"), + } + + +class SingleResultExport(ModelExport): + def get_value_map(self): + return { + "pk": "pk", + "study": "study_id", + "exposure_name": "exposure_name", + "weight": "weight", + "n": "n", + "estimate": "estimate", + "lower_ci": "lower_ci", + "upper_ci": "upper_ci", + "ci_units": "ci_units", + "notes": "notes", + } + + +class EpiMetaExporter(Exporter): + def build_modules(self) -> list[ModelExport]: + return [ + StudyExport("study", "protocol__study"), + MetaProtocolExport("meta_protocol", "protocol"), + MetaResultExport("meta_result", ""), + SingleResultExport("single_result", "single_results"), + ] + + +class EpiMetaDataPivotExporter(Exporter): + def build_modules(self) -> list[ModelExport]: + return [ + StudyExport( + "study", + "protocol__study", + include=( + "id", + "short_citation", + "published", + ), + ), + MetaProtocolExport( + "meta_protocol", + "protocol", + include=( + "pk", + "name", + "protocol_type", + "total_references", + "total_studies_identified", + ), + ), + MetaResultExport( + "meta_result", + "", + include=( + "pk", + "label", + "health_outcome", + "exposure_name", + "number_studies", + "n", + "estimate", + "lower_ci", + "upper_ci", + "ci_units", + "heterogeneity", + ), + ), + ResultMetricExport( + "metric", + "metric", + include=( + "name", + "abbreviation", + ), + ), + ] class MetaResultFlatComplete(FlatFileExporter): @@ -9,36 +161,8 @@ class MetaResultFlatComplete(FlatFileExporter): epidemiological meta-result study type from scratch. """ - def _get_header_row(self): - header = [] - header.extend(Study.flat_complete_header_row()) - header.extend(models.MetaProtocol.flat_complete_header_row()) - header.extend(models.MetaResult.flat_complete_header_row()) - header.extend(models.SingleResult.flat_complete_header_row()) - return header - - def _get_data_rows(self): - rows = [] - identifiers_df = Study.identifiers_df(self.queryset, "protocol__study_id") - for obj in self.queryset: - ser = obj.get_json(json_encode=False) - row = [] - row.extend(Study.flat_complete_data_row(ser["protocol"]["study"], identifiers_df)) - row.extend(models.MetaProtocol.flat_complete_data_row(ser["protocol"])) - row.extend(models.MetaResult.flat_complete_data_row(ser)) - - if len(ser["single_results"]) == 0: - # print one-row with no single-results - row.extend([None] * 10) - rows.append(row) - else: - # print each single-result as a new row - for sr in ser["single_results"]: - row_copy = list(row) # clone - row_copy.extend(models.SingleResult.flat_complete_data_row(sr)) - rows.append(row_copy) - - return rows + def build_df(self) -> pd.DataFrame: + return EpiMetaExporter().get_df(self.queryset) class MetaResultFlatDataPivot(FlatFileExporter): @@ -49,60 +173,35 @@ class MetaResultFlatDataPivot(FlatFileExporter): Note: data pivot does not currently include study confidence. Could be added if needed. """ - def _get_header_row(self): - return [ - "study id", - "study name", - "study published", - "protocol id", - "protocol name", - "protocol type", - "total references", - "identified references", - "key", - "meta result id", - "meta result label", - "health outcome", - "exposure", - "result references", - "statistical metric", - "statistical metric abbreviation", - "N", - "estimate", - "lower CI", - "upper CI", - "CI units", - "heterogeneity", - ] + def build_df(self) -> pd.DataFrame: + df = EpiMetaDataPivotExporter().get_df(self.queryset) + + df["key"] = df["meta_result-pk"] - def _get_data_rows(self): - rows = [] - for obj in self.queryset: - ser = obj.get_json(json_encode=False) - row = [ - ser["protocol"]["study"]["id"], - ser["protocol"]["study"]["short_citation"], - ser["protocol"]["study"]["published"], - ser["protocol"]["id"], - ser["protocol"]["name"], - ser["protocol"]["protocol_type"], - ser["protocol"]["total_references"], - ser["protocol"]["total_studies_identified"], - ser["id"], # repeat for data-pivot key - ser["id"], - ser["label"], - ser["health_outcome"], - ser["exposure_name"], - ser["number_studies"], - ser["metric"]["metric"], - ser["metric"]["abbreviation"], - ser["n"], - ser["estimate"], - ser["lower_ci"], - ser["upper_ci"], - ser["ci_units"], - ser["heterogeneity"], - ] - rows.append(row) - - return rows + df = df.rename( + columns={ + "study-id": "study id", + "study-short_citation": "study name", + "study-published": "study published", + "meta_protocol-pk": "protocol id", + "meta_protocol-name": "protocol name", + "meta_protocol-protocol_type": "protocol type", + "meta_protocol-total_references": "total references", + "meta_protocol-total_studies_identified": "identified references", + "meta_result-pk": "meta result id", + "meta_result-label": "meta result label", + "meta_result-health_outcome": "health outcome", + "meta_result-exposure_name": "exposure", + "meta_result-number_studies": "result references", + "metric-name": "statistical metric", + "metric-abbreviation": "statistical metric abbreviation", + "meta_result-n": "N", + "meta_result-estimate": "estimate", + "meta_result-lower_ci": "lower CI", + "meta_result-upper_ci": "upper CI", + "meta_result-ci_units": "CI units", + "meta_result-heterogeneity": "heterogeneity", + }, + errors="raise", + ) + return df diff --git a/hawc/apps/epimeta/models.py b/hawc/apps/epimeta/models.py index 61527c5ae2..2426de6cf6 100644 --- a/hawc/apps/epimeta/models.py +++ b/hawc/apps/epimeta/models.py @@ -69,42 +69,6 @@ def get_absolute_url(self): def get_json(self, json_encode=True): return SerializerHelper.get_serialized(self, json=json_encode, from_cache=False) - @staticmethod - def flat_complete_header_row(): - return ( - "meta_protocol-pk", - "meta_protocol-url", - "meta_protocol-name", - "meta_protocol-protocol_type", - "meta_protocol-lit_search_strategy", - "meta_protocol-lit_search_notes", - "meta_protocol-lit_search_start_date", - "meta_protocol-lit_search_end_date", - "meta_protocol-total_references", - "meta_protocol-inclusion_criteria", - "meta_protocol-exclusion_criteria", - "meta_protocol-total_studies_identified", - "meta_protocol-notes", - ) - - @staticmethod - def flat_complete_data_row(ser): - return ( - ser["id"], - ser["url"], - ser["name"], - ser["protocol_type"], - ser["lit_search_strategy"], - ser["lit_search_notes"], - ser["lit_search_start_date"], - ser["lit_search_end_date"], - ser["total_references"], - "|".join(ser["inclusion_criteria"]), - "|".join(ser["exclusion_criteria"]), - ser["total_studies_identified"], - ser["notes"], - ) - def get_study(self): return self.study @@ -191,54 +155,6 @@ def get_qs_json(queryset, json_encode=True): else: return results - @staticmethod - def flat_complete_header_row(): - return ( - "meta_result-pk", - "meta_result-url", - "meta_result-label", - "meta_result-data_location", - "meta_result-health_outcome", - "meta_result-health_outcome_notes", - "meta_result-exposure_name", - "meta_result-exposure_details", - "meta_result-number_studies", - "meta_result-statistical_metric", - "meta_result-statistical_notes", - "meta_result-n", - "meta_result-estimate", - "meta_result-lower_ci", - "meta_result-upper_ci", - "meta_result-ci_units", - "meta_result-heterogeneity", - "meta_result-adjustment_factors", - "meta_result-notes", - ) - - @staticmethod - def flat_complete_data_row(ser): - return ( - ser["id"], - ser["url"], - ser["label"], - ser["data_location"], - ser["health_outcome"], - ser["health_outcome_notes"], - ser["exposure_name"], - ser["exposure_details"], - ser["number_studies"], - ser["metric"]["metric"], - ser["statistical_notes"], - ser["n"], - ser["estimate"], - ser["lower_ci"], - ser["upper_ci"], - ser["ci_units"], - ser["heterogeneity"], - "|".join(ser["adjustment_factors"]), - ser["notes"], - ) - def get_study(self): if self.protocol is not None: return self.protocol.get_study() @@ -317,42 +233,6 @@ def estimate_formatted(self): txt += f" ({self.lower_ci}, {self.upper_ci})" return txt - @staticmethod - def flat_complete_header_row(): - return ( - "single_result-pk", - "single_result-study", - "single_result-exposure_name", - "single_result-weight", - "single_result-n", - "single_result-estimate", - "single_result-lower_ci", - "single_result-upper_ci", - "single_result-ci_units", - "single_result-notes", - ) - - @staticmethod - def flat_complete_data_row(ser): - study = None - try: - study = ser["study"]["id"] - except TypeError: - pass - - return ( - ser["id"], - study, - ser["exposure_name"], - ser["weight"], - ser["n"], - ser["estimate"], - ser["lower_ci"], - ser["upper_ci"], - ser["ci_units"], - ser["notes"], - ) - def get_study(self): if self.meta_result is not None: return self.meta_result.get_study() diff --git a/hawc/apps/invitro/api.py b/hawc/apps/invitro/api.py index 3af8fe3bfc..6bb08db082 100644 --- a/hawc/apps/invitro/api.py +++ b/hawc/apps/invitro/api.py @@ -43,7 +43,7 @@ def full_export(self, request, pk): ser.is_valid(raise_exception=True) self.object_list = self.get_endpoint_queryset(request) exporter = exports.DataPivotEndpoint( - self.object_list, filename=f"{self.assessment}-invitro" + self.object_list, filename=f"{self.assessment}-invitro", assessment=self.assessment ) return Response(exporter.build_export()) diff --git a/hawc/apps/invitro/exports.py b/hawc/apps/invitro/exports.py index e05004de2b..6f4c137e4c 100644 --- a/hawc/apps/invitro/exports.py +++ b/hawc/apps/invitro/exports.py @@ -1,362 +1,560 @@ -from copy import copy +import math -from django.apps import apps +import pandas as pd +from django.db.models import Exists, OuterRef -from ..common.helper import FlatFileExporter -from ..materialized.models import FinalRiskOfBiasScore -from ..study.models import Study +from ..common.exports import Exporter, ModelExport +from ..common.helper import FlatFileExporter, df_move_column +from ..common.models import sql_display, str_m2m +from ..materialized.exports import get_final_score_df +from ..study.exports import StudyExport +from . import constants, models -def getDose(ser, tag): - if ser[tag] != -999: - return ser["groups"][ser[tag]]["dose"] - else: - return None +def percent_control(n_1, mu_1, sd_1, n_2, mu_2, sd_2): + mean = low = high = None + + if mu_1 is not None and mu_2 is not None and mu_1 > 0 and mu_2 > 0: + mean = (mu_2 - mu_1) / mu_1 * 100.0 + if sd_1 and sd_2 and n_1 and n_2: + sd = math.sqrt( + pow(mu_1, -2) + * ((pow(sd_2, 2) / n_2) + (pow(mu_2, 2) * pow(sd_1, 2)) / (n_1 * pow(mu_1, 2))) + ) + ci = (1.96 * sd) * 100 + rng = sorted([mean - ci, mean + ci]) + low = rng[0] + high = rng[1] + + return mean, low, high -def getDoseRange(ser): - # get non-zero dose range - doses = [eg["dose"] for eg in ser["groups"] if eg["dose"] > 0] - if doses: - return min(doses), max(doses) - else: - return None, None +def assessment_categories(assessment_id: int) -> pd.DataFrame: + df = models.IVEndpointCategory.as_dataframe(assessment_id, include_root=False).set_index("id") + df2 = pd.DataFrame(df.nested_name.str.split("|").tolist(), index=df.index).fillna("-") + df2.columns = [f"Category {i}" for i in range(1, len(df2.columns) + 1)] + return df2 + + +def handle_categories(df: pd.DataFrame, assessment_id: int) -> pd.DataFrame: + category_df = assessment_categories(assessment_id) + df["iv_endpoint-category_id"] = df["iv_endpoint-category_id"].astype("Int64") + df2 = df.merge(category_df, left_on="iv_endpoint-category_id", right_index=True, how="left") + if "Category 1" in df2.columns: + df2 = df_move_column( + df2, "Category 1", "iv_endpoint-category_id", n_cols=category_df.shape[1] + ) + return df2.drop(columns=["iv_endpoint-category_id"]) + + +class IVChemicalExport(ModelExport): + def get_value_map(self): + return { + "id": "id", + "name": "name", + "cas": "cas", + "dtxsid_id": "dtxsid_id", + "purity": "purity", + } + + +class IVExperimentExport(ModelExport): + def get_value_map(self): + return { + "id": "id", + "dose_units": "dose_units__name", + "metabolic_activation": "metabolic_activation_display", + "transfection": "transfection", + } + + def get_annotation_map(self, query_prefix): + return { + "metabolic_activation_display": sql_display( + query_prefix + "metabolic_activation", constants.MetabolicActivation + ), + } + + +class IVCellTypeExport(ModelExport): + def get_value_map(self): + return { + "id": "id", + "species": "species", + "strain": "strain", + "sex": "sex_display", + "cell_type": "cell_type", + "tissue": "tissue", + } + + def get_annotation_map(self, query_prefix): + return { + "sex_display": sql_display(query_prefix + "sex", constants.Sex), + } + + +class IVEndpointExport(ModelExport): + def get_value_map(self): + return { + "id": "id", + "name": "name", + "data_type": "data_type", + "category_id": "category_id", + "variance_type": "variance_type", + "effects": "effects__name", + "assay_type": "assay_type", + "short_description": "short_description", + "response_units": "response_units", + "observation_time": "observation_time", + "observation_time_units": "observation_time_units_display", + "NOEL": "NOEL", + "LOEL": "LOEL", + "monotonicity": "monotonicity_display", + "overall_pattern": "overall_pattern_display", + "trend_test": "trend_test_display", + } + + def get_annotation_map(self, query_prefix): + return { + "effects__name": str_m2m(query_prefix + "effects__name"), + "observation_time_units_display": sql_display( + query_prefix + "observation_time_units", constants.ObservationTimeUnits + ), + "monotonicity_display": sql_display( + query_prefix + "monotonicity", constants.Monotonicity + ), + "overall_pattern_display": sql_display( + query_prefix + "overall_pattern", constants.OverallPattern + ), + "trend_test_display": sql_display( + query_prefix + "trend_test", constants.TrendTestResult + ), + } + + +class IVEndpointGroupExport(ModelExport): + def get_value_map(self): + return { + "id": "id", + "dose_group_id": "dose_group_id", + "dose": "dose", + "n": "n", + "response": "response", + "variance": "variance", + "difference_control": "difference_control", + "difference_control_display": "difference_control_display", + "significant_control": "significant_control_display", + "cytotoxicity_observed": "cytotoxicity_observed_display", + "precipitation_observed": "precipitation_observed_display", + } + + def get_annotation_map(self, query_prefix): + Observation = type("Observation", (object,), {"choices": constants.OBSERVATION_CHOICES}) + return { + "difference_control_display": sql_display( + query_prefix + "difference_control", constants.DifferenceControl + ), + "significant_control_display": sql_display( + query_prefix + "significant_control", constants.Significance + ), + "cytotoxicity_observed_display": sql_display( + query_prefix + "cytotoxicity_observed", Observation + ), + "precipitation_observed_display": sql_display( + query_prefix + "precipitation_observed", Observation + ), + } + + +class IVBenchmarkExport(ModelExport): + def get_value_map(self): + return { + "id": "id", + "benchmark": "benchmark", + "value": "value", + } + + +class InvitroExporter(Exporter): + def build_modules(self) -> list[ModelExport]: + return [ + StudyExport( + "study", + "experiment__study", + include=( + "id", + "hero_id", + "pubmed_id", + "doi", + "short_citation", + "study_identifier", + "published", + ), + ), + IVChemicalExport( + "iv_chemical", + "chemical", + ), + IVExperimentExport( + "iv_experiment", + "experiment", + ), + IVCellTypeExport( + "iv_cell_type", + "experiment__cell_type", + ), + IVEndpointExport( + "iv_endpoint", + "", + exclude=( + "data_type", + "variance_type", + ), + ), + IVEndpointGroupExport( + "iv_endpoint_group", + "groups", + include=( + "id", + "dose", + "difference_control_display", + "significant_control", + "cytotoxicity_observed", + ), + ), + IVBenchmarkExport( + "iv_benchmark", + "benchmarks", + ), + ] class DataPivotEndpoint(FlatFileExporter): - def _get_header_row(self): - if self.queryset.first() is None: - self.rob_headers, self.rob_data = {}, {} - else: - study_ids = set(self.queryset.values_list("experiment__study_id", flat=True)) - self.rob_headers, self.rob_data = FinalRiskOfBiasScore.get_dp_export( - self.queryset.first().assessment_id, - study_ids, - "invitro", + def handle_dose_groups(self, df: pd.DataFrame) -> pd.DataFrame: + def _func(group_df: pd.DataFrame) -> pd.Series: + # handle case with no dose groups + if group_df["iv_endpoint_group-id"].isna().all(): + group_df["number of doses"] = 0 + group_df["minimum dose"] = None + group_df["maximum dose"] = None + group_df["iv_endpoint-NOEL"] = None + group_df["iv_endpoint-LOEL"] = None + return group_df + # only interested in unique, non-control dose groups + unique_df = group_df.drop_duplicates(subset="iv_endpoint_group-id") + non_control_df = unique_df.loc[unique_df["iv_endpoint_group-dose"] > 0] + # add dose related columns + group_df["number of doses"] = non_control_df.shape[0] + group_df["minimum dose"] = non_control_df["iv_endpoint_group-dose"].min() + group_df["maximum dose"] = non_control_df["iv_endpoint_group-dose"].max() + NOEL_index = unique_df.iloc[0]["iv_endpoint-NOEL"] + group_df["iv_endpoint-NOEL"] = ( + None if NOEL_index == -999 else unique_df.iloc[NOEL_index]["iv_endpoint_group-dose"] + ) + LOEL_index = unique_df.iloc[0]["iv_endpoint-LOEL"] + group_df["iv_endpoint-LOEL"] = ( + None if LOEL_index == -999 else unique_df.iloc[LOEL_index]["iv_endpoint_group-dose"] ) + for i, row in enumerate(non_control_df.itertuples(index=False, name=None), start=1): + group_df[f"Dose {i}"] = row[ + non_control_df.columns.get_loc("iv_endpoint_group-dose") + ] + group_df[f"Change Control {i}"] = row[ + non_control_df.columns.get_loc("iv_endpoint_group-difference_control_display") + ] + group_df[f"Significant {i}"] = row[ + non_control_df.columns.get_loc("iv_endpoint_group-significant_control") + ] + group_df[f"Cytotoxicity {i}"] = row[ + non_control_df.columns.get_loc("iv_endpoint_group-cytotoxicity_observed") + ] - header = [ - "study id", - "study hero_id", - "study pubmed_id", - "study doi", - "study name", - "study identifier", - "study published", - ] + # return a df that is dose group agnostic + return group_df.drop_duplicates( + subset=group_df.columns[group_df.columns.str.endswith("-id")].difference( + ["iv_endpoint_group-id"] + ) + ) - header.extend(list(self.rob_headers.values())) - - header.extend( - [ - "chemical id", - "chemical name", - "chemical CAS", - "chemical DTXSID", - "chemical purity", - "IVExperiment id", - "IVCellType id", - "cell species", - "cell strain", - "cell sex", - "cell type", - "cell tissue", - "Dose units", - "Metabolic activation", - "Transfection", - "key", - "IVEndpoint id", - "IVEndpoint name", - "IVEndpoint description tags", - "assay type", - "endpoint description", - "endpoint response units", - "observation time", - "observation time units", - "NOEL", - "LOEL", - "monotonicity", - "overall pattern", - "trend test result", - "minimum dose", - "maximum dose", - "number of doses", - ] + return ( + df.groupby("iv_endpoint-id", group_keys=False) + .apply(_func) + .drop( + columns=[ + "iv_endpoint_group-id", + "iv_endpoint_group-dose", + "iv_endpoint_group-difference_control_display", + "iv_endpoint_group-significant_control", + "iv_endpoint_group-cytotoxicity_observed", + ] + ) + .reset_index(drop=True) ) - num_cats = 0 - if self.queryset.count() > 0: - IVEndpointCategory = apps.get_model("invitro", "IVEndpointCategory") - num_cats = IVEndpointCategory.get_maximum_depth(self.queryset[0].assessment_id) - header.extend([f"Category {i}" for i in range(1, num_cats + 1)]) - - num_doses = self.queryset.model.max_dose_count(self.queryset) - rng = range(1, num_doses + 1) - header.extend([f"Dose {i}" for i in rng]) - header.extend([f"Change Control {i}" for i in rng]) - header.extend([f"Significant {i}" for i in rng]) - header.extend([f"Cytotoxicity {i}" for i in rng]) - - num_bms = self.queryset.model.max_benchmark_count(self.queryset) - rng = range(1, num_bms + 1) - header.extend([f"Benchmark Type {i}" for i in rng]) - header.extend([f"Benchmark Value {i}" for i in rng]) - - self.num_cats = num_cats - self.num_doses = num_doses - self.num_bms = num_bms - - return header - - def _get_data_rows(self): - rows = [] - - identifiers_df = Study.identifiers_df(self.queryset, "experiment__study_id") - - for obj in self.queryset: - ser = obj.get_json(json_encode=False) - - doseRange = getDoseRange(ser) - - cats = ser["category"]["names"] if ser["category"] else [] - - doses = [eg["dose"] for eg in ser["groups"]] - diffs = [eg["difference_control"] for eg in ser["groups"]] - sigs = [eg["significant_control"] for eg in ser["groups"]] - cytotoxes = [eg["cytotoxicity_observed"] for eg in ser["groups"]] - - if doses and doses[0] == 0: - doses.pop(0) - diffs.pop(0) - sigs.pop(0) - - number_doses = len(doses) - - bm_types = [bm["benchmark"] for bm in ser["benchmarks"]] - bm_values = [bm["value"] for bm in ser["benchmarks"]] - - study_id = ser["experiment"]["study"]["id"] - row = [ - study_id, - identifiers_df["hero_id"].get(study_id), - identifiers_df["pubmed_id"].get(study_id), - identifiers_df["doi"].get(study_id), - ser["experiment"]["study"]["short_citation"], - ser["experiment"]["study"]["study_identifier"], - ser["experiment"]["study"]["published"], - ] - - study_robs = [ - self.rob_data[(study_id, metric_id)] for metric_id in self.rob_headers.keys() - ] - row.extend(study_robs) - - row.extend( - [ - ser["chemical"]["id"], - ser["chemical"]["name"], - ser["chemical"]["cas"], - ser["chemical"]["dtxsid"], - ser["chemical"]["purity"], - ser["experiment"]["id"], - ser["experiment"]["cell_type"]["id"], - ser["experiment"]["cell_type"]["species"], - ser["experiment"]["cell_type"]["strain"], - ser["experiment"]["cell_type"]["sex"], - ser["experiment"]["cell_type"]["cell_type"], - ser["experiment"]["cell_type"]["tissue"], - ser["experiment"]["dose_units"]["name"], - ser["experiment"]["metabolic_activation"], - ser["experiment"]["transfection"], - ser["id"], # repeat for data-pivot key - ser["id"], - ser["name"], - "|".join([d["name"] for d in ser["effects"]]), - ser["assay_type"], - ser["short_description"], - ser["response_units"], - ser["observation_time"], - ser["observation_time_units"], - getDose(ser, "NOEL"), - getDose(ser, "LOEL"), - ser["monotonicity"], - ser["overall_pattern"], - ser["trend_test"], - doseRange[0], - doseRange[1], - number_doses, + def handle_benchmarks(self, df: pd.DataFrame) -> pd.DataFrame: + def _func(group_df: pd.DataFrame) -> pd.DataFrame: + # handle case with no benchmarks + if group_df["iv_benchmark-id"].isna().all(): + # no need to deduplicate, since there should be + # only one benchmark id: None + return group_df + # only interested in unique benchmarks + unique_df = group_df.drop_duplicates(subset="iv_benchmark-id") + # add the benchmark columns + for i, row in enumerate(unique_df.itertuples(index=False, name=None), start=1): + group_df[f"Benchmark Type {i}"] = row[ + unique_df.columns.get_loc("iv_benchmark-benchmark") ] + group_df[f"Benchmark Value {i}"] = row[ + unique_df.columns.get_loc("iv_benchmark-value") + ] + # return a df that is benchmark agnostic + return group_df.drop_duplicates( + subset=group_df.columns[group_df.columns.str.endswith("-id")].difference( + ["iv_benchmark-id"] + ) ) - # extend rows to include blank placeholders, and apply - cats.extend([None] * (self.num_cats - len(cats))) - doses.extend([None] * (self.num_doses - len(doses))) - diffs.extend([None] * (self.num_doses - len(diffs))) - sigs.extend([None] * (self.num_doses - len(sigs))) - cytotoxes.extend([None] * (self.num_doses - len(cytotoxes))) + return ( + df.groupby("iv_endpoint-id", group_keys=False) + .apply(_func) + .drop(columns=["iv_benchmark-id", "iv_benchmark-benchmark", "iv_benchmark-value"]) + .reset_index(drop=True) + ) - bm_types.extend([None] * (self.num_bms - len(bm_types))) - bm_values.extend([None] * (self.num_bms - len(bm_values))) + def build_df(self) -> pd.DataFrame: + df = InvitroExporter().get_df( + self.queryset.select_related( + "experiment__study", "chemical__dtxsid", "experiment__cell_type" + ) + .prefetch_related("groups", "benchmarks") + .order_by("id", "groups", "benchmarks") + ) + if obj := self.queryset.first(): + study_ids = list(df["study-id"].unique()) + rob_df = get_final_score_df(obj.assessment_id, study_ids, "invitro") + df = df.join(rob_df, on="study-id") + + df["key"] = df["iv_endpoint-id"] + + df = self.handle_dose_groups(df) + df = self.handle_benchmarks(df) + df = handle_categories(df, self.kwargs["assessment"].id) + + df = df.rename( + columns={ + "study-id": "study id", + "study-hero_id": "study hero_id", + "study-pubmed_id": "study pubmed_id", + "study-doi": "study doi", + "study-short_citation": "study name", + "study-study_identifier": "study identifier", + "study-published": "study published", + } + ) + df = df.rename( + columns={ + "iv_chemical-id": "chemical id", + "iv_chemical-name": "chemical name", + "iv_chemical-cas": "chemical CAS", + "iv_chemical-dtxsid_id": "chemical DTXSID", + "iv_chemical-purity": "chemical purity", + "iv_experiment-id": "IVExperiment id", + "iv_experiment-dose_units": "Dose units", + "iv_experiment-metabolic_activation": "Metabolic activation", + "iv_experiment-transfection": "Transfection", + "iv_cell_type-id": "IVCellType id", + "iv_cell_type-species": "cell species", + "iv_cell_type-strain": "cell strain", + "iv_cell_type-sex": "cell sex", + "iv_cell_type-cell_type": "cell type", + "iv_cell_type-tissue": "cell tissue", + "iv_endpoint-id": "IVEndpoint id", + "iv_endpoint-name": "IVEndpoint name", + "iv_endpoint-effects": "IVEndpoint description tags", + "iv_endpoint-assay_type": "assay type", + "iv_endpoint-short_description": "endpoint description", + "iv_endpoint-response_units": "endpoint response units", + "iv_endpoint-observation_time": "observation time", + "iv_endpoint-observation_time_units": "observation time units", + "iv_endpoint-NOEL": "NOEL", + "iv_endpoint-LOEL": "LOEL", + "iv_endpoint-monotonicity": "monotonicity", + "iv_endpoint-overall_pattern": "overall pattern", + "iv_endpoint-trend_test": "trend test result", + } + ) - row.extend(cats) - row.extend(doses) - row.extend(diffs) - row.extend(sigs) - row.extend(cytotoxes) - row.extend(bm_types) - row.extend(bm_values) + return df + + +class InvitroGroupExporter(Exporter): + def build_modules(self) -> list[ModelExport]: + return [ + StudyExport( + "study", + "experiment__study", + include=("id", "short_citation", "study_identifier", "published"), + ), + IVChemicalExport( + "iv_chemical", + "chemical", + ), + IVExperimentExport( + "iv_experiment", + "experiment", + ), + IVCellTypeExport( + "iv_cell_type", + "experiment__cell_type", + ), + IVEndpointExport( + "iv_endpoint", + "", + ), + IVEndpointGroupExport( + "iv_endpoint_group", "groups", exclude=("difference_control_display",) + ), + ] - rows.append(row) - return rows +class DataPivotEndpointGroup(FlatFileExporter): + def handle_stdev(self, df: pd.DataFrame) -> pd.DataFrame: + df["stdev"] = df.apply( + lambda x: models.IVEndpointGroup.stdev( + x["iv_endpoint-variance_type"], + x["iv_endpoint_group-variance"], + x["iv_endpoint_group-n"], + ), + axis="columns", + ) + return df.drop(columns=["iv_endpoint-variance_type", "iv_endpoint_group-variance"]) + def handle_dose_groups(self, df: pd.DataFrame) -> pd.DataFrame: + def _func(group_df: pd.DataFrame) -> pd.DataFrame: + control = group_df.iloc[0] -class DataPivotEndpointGroup(FlatFileExporter): - def _get_header_row(self): - if self.queryset.first() is None: - self.rob_headers, self.rob_data = {}, {} - else: - study_ids = set(self.queryset.values_list("experiment__study_id", flat=True)) - self.rob_headers, self.rob_data = FinalRiskOfBiasScore.get_dp_export( - self.queryset.first().assessment_id, - study_ids, - "invitro", - ) + group_df["low_dose"] = group_df["iv_endpoint_group-dose"].loc[lambda x: x > 0].min() + group_df["high_dose"] = group_df["iv_endpoint_group-dose"].loc[lambda x: x > 0].max() - header = [ - "study id", - "study name", - "study identifier", - "study published", - ] + group_df["iv_endpoint-NOEL"] = ( + None + if control["iv_endpoint-NOEL"] == -999 + else group_df.iloc[control["iv_endpoint-NOEL"]]["iv_endpoint_group-dose"] + ) + group_df["iv_endpoint-LOEL"] = ( + None + if control["iv_endpoint-LOEL"] == -999 + else group_df.iloc[control["iv_endpoint-LOEL"]]["iv_endpoint_group-dose"] + ) - header.extend(list(self.rob_headers.values())) - - header.extend( - [ - "chemical id", - "chemical name", - "chemical CAS", - "chemical DTXSID", - "chemical purity", - "IVExperiment id", - "IVCellType id", - "cell species", - "cell strain", - "cell sex", - "cell type", - "cell tissue", - "dose units", - "metabolic activation", - "transfection", - "IVEndpoint id", - "IVEndpoint name", - "IVEndpoint description tags", - "assay type", - "endpoint description", - "endpoint response units", - "observation time", - "observation time units", - "low_dose", - "NOEL", - "LOEL", - "high_dose", - "monotonicity", - "overall pattern", - "trend test result", - "key", - "dose index", - "dose", - "N", - "response", - "stdev", - "percent control mean", - "percent control low", - "percent control high", - "change from control", - "significant from control", - "cytotoxicity observed", - "precipitation observed", - ] + data_type = control["iv_endpoint-data_type"] + n_1 = control["iv_endpoint_group-n"] + mu_1 = control["iv_endpoint_group-response"] + sd_1 = control["stdev"] + + def __func(row: pd.Series) -> pd.Series: + # logic used from IVEndpointGroup.percentControl() + if data_type == constants.DataType.CONTINUOUS: + n_2 = row["iv_endpoint_group-n"] + mu_2 = row["iv_endpoint_group-response"] + sd_2 = row["stdev"] + ( + row["percent control mean"], + row["percent control low"], + row["percent control high"], + ) = percent_control(n_1, mu_1, sd_1, n_2, mu_2, sd_2) + elif data_type == constants.DataType.DICHOTOMOUS: + # TODO this seems to be a dead conditional; + # invitro has no 'incidence' variables so + # nothing is ever computed here + pass + return row + + return group_df.apply(__func, axis="columns") + + return ( + df.groupby("iv_endpoint-id", group_keys=False) + .apply(_func) + .drop(columns="iv_endpoint-data_type") + .reset_index(drop=True) ) - return header - - def _get_data_rows(self): - rows = [] - - for obj in self.queryset: - ser = obj.get_json(json_encode=False) - - doseRange = getDoseRange(ser) - - row = [ - ser["experiment"]["study"]["id"], - ser["experiment"]["study"]["short_citation"], - ser["experiment"]["study"]["study_identifier"], - ser["experiment"]["study"]["published"], - ] - - study_id = ser["experiment"]["study"]["id"] - study_robs = [ - self.rob_data[(study_id, metric_id)] for metric_id in self.rob_headers.keys() - ] - row.extend(study_robs) - - row.extend( - [ - ser["chemical"]["id"], - ser["chemical"]["name"], - ser["chemical"]["cas"], - ser["chemical"]["dtxsid"], - ser["chemical"]["purity"], - ser["experiment"]["id"], - ser["experiment"]["cell_type"]["id"], - ser["experiment"]["cell_type"]["species"], - ser["experiment"]["cell_type"]["strain"], - ser["experiment"]["cell_type"]["sex"], - ser["experiment"]["cell_type"]["cell_type"], - ser["experiment"]["cell_type"]["tissue"], - ser["experiment"]["dose_units"]["name"], - ser["experiment"]["metabolic_activation"], - ser["experiment"]["transfection"], - ser["id"], - ser["name"], - "|".join([d["name"] for d in ser["effects"]]), - ser["assay_type"], - ser["short_description"], - ser["response_units"], - ser["observation_time"], - ser["observation_time_units"], - doseRange[0], - getDose(ser, "NOEL"), - getDose(ser, "LOEL"), - doseRange[1], - ser["monotonicity"], - ser["overall_pattern"], - ser["trend_test"], - ] + + def build_df(self) -> pd.DataFrame: + df = InvitroGroupExporter().get_df( + self.queryset.select_related( + "experiment__study", "chemical__dtxsid", "experiment__cell_type" ) + .prefetch_related("groups") + .filter(Exists(models.IVEndpointGroup.objects.filter(endpoint=OuterRef("pk")))) + .order_by("id", "groups") + ) + if obj := self.queryset.first(): + study_ids = list(df["study-id"].unique()) + rob_df = get_final_score_df(obj.assessment_id, study_ids, "invitro") + df = df.join(rob_df, on="study-id") - # endpoint-group information - for i, eg in enumerate(ser["groups"]): - row_copy = copy(row) - row_copy.extend( - [ - eg["id"], - eg["dose_group_id"], - eg["dose"], - eg["n"], - eg["response"], - eg["stdev"], - eg["percentControlMean"], - eg["percentControlLow"], - eg["percentControlHigh"], - eg["difference_control_symbol"], - eg["significant_control"], - eg["cytotoxicity_observed"], - eg["precipitation_observed"], - ] - ) - rows.append(row_copy) + df["key"] = df["iv_endpoint_group-id"] + df = df.drop(columns=["iv_endpoint_group-id"]) + + df = self.handle_stdev(df) + df = self.handle_dose_groups(df) + df = handle_categories(df, self.kwargs["assessment"].id) + + df["iv_endpoint_group-difference_control"] = df["iv_endpoint_group-difference_control"].map( + models.IVEndpointGroup.DIFFERENCE_CONTROL_SYMBOLS + ) + + df = df.rename( + columns={ + "study-id": "study id", + "study-short_citation": "study name", + "study-study_identifier": "study identifier", + "study-published": "study published", + } + ) + df = df.rename( + columns={ + "iv_chemical-id": "chemical id", + "iv_chemical-name": "chemical name", + "iv_chemical-cas": "chemical CAS", + "iv_chemical-dtxsid_id": "chemical DTXSID", + "iv_chemical-purity": "chemical purity", + "iv_experiment-id": "IVExperiment id", + "iv_experiment-dose_units": "dose units", + "iv_experiment-metabolic_activation": "metabolic activation", + "iv_experiment-transfection": "transfection", + "iv_cell_type-id": "IVCellType id", + "iv_cell_type-species": "cell species", + "iv_cell_type-strain": "cell strain", + "iv_cell_type-sex": "cell sex", + "iv_cell_type-cell_type": "cell type", + "iv_cell_type-tissue": "cell tissue", + "iv_endpoint-id": "IVEndpoint id", + "iv_endpoint-name": "IVEndpoint name", + "iv_endpoint-effects": "IVEndpoint description tags", + "iv_endpoint-assay_type": "assay type", + "iv_endpoint-short_description": "endpoint description", + "iv_endpoint-response_units": "endpoint response units", + "iv_endpoint-observation_time": "observation time", + "iv_endpoint-observation_time_units": "observation time units", + "iv_endpoint-NOEL": "NOEL", + "iv_endpoint-LOEL": "LOEL", + "iv_endpoint-monotonicity": "monotonicity", + "iv_endpoint-overall_pattern": "overall pattern", + "iv_endpoint-trend_test": "trend test result", + } + ) + df = df.rename( + columns={ + "iv_endpoint_group-dose_group_id": "dose index", + "iv_endpoint_group-dose": "dose", + "iv_endpoint_group-n": "N", + "iv_endpoint_group-response": "response", + "iv_endpoint_group-difference_control": "change from control", + "iv_endpoint_group-significant_control": "significant from control", + "iv_endpoint_group-cytotoxicity_observed": "cytotoxicity observed", + "iv_endpoint_group-precipitation_observed": "precipitation observed", + } + ) - return rows + return df diff --git a/hawc/apps/materialized/exports.py b/hawc/apps/materialized/exports.py new file mode 100644 index 0000000000..52cd2f86b4 --- /dev/null +++ b/hawc/apps/materialized/exports.py @@ -0,0 +1,13 @@ +import pandas as pd + +from ..common.helper import unique_text_list +from .models import FinalRiskOfBiasScore + + +def get_final_score_df(assessment_id: int, ids: list[int], model: str) -> pd.DataFrame: + rob_headers, rob_data = FinalRiskOfBiasScore.get_dp_export(assessment_id, ids, model) + return pd.DataFrame( + data=[[rob_data[(id, metric_id)] for metric_id in rob_headers.keys()] for id in ids], + columns=unique_text_list(list(rob_headers.values())), + index=ids, + ) diff --git a/hawc/apps/riskofbias/api.py b/hawc/apps/riskofbias/api.py index 4951931d90..93c45bf9de 100644 --- a/hawc/apps/riskofbias/api.py +++ b/hawc/apps/riskofbias/api.py @@ -52,13 +52,18 @@ def export(self, request, pk): ser.is_valid(raise_exception=True) published_only = get_published_only(self.assessment, request) rob_name = self.assessment.get_rob_name_display().lower() - exporter = exports.RiskOfBiasFlat( - self.get_queryset().none(), - filename=f"{self.assessment}-{rob_name}", - assessment_id=self.assessment.id, - published_only=published_only, + qs = ( + models.RiskOfBiasScore.objects.filter( + riskofbias__active=True, + riskofbias__final=True, + riskofbias__study__assessment=self.assessment, + ) + .published_only(published_only) + .order_by("riskofbias__study__short_citation", "riskofbias_id", "id") ) - return Response(exporter.build_export()) + filename = f"{self.assessment}-{rob_name}" + exporter = exports.RiskOfBiasExporter.flat_export(qs, filename) + return Response(exporter) @action( detail=True, @@ -75,14 +80,17 @@ def full_export(self, request, pk): ser.is_valid(raise_exception=True) published_only = get_published_only(self.assessment, request) rob_name = self.assessment.get_rob_name_display().lower() - exporter = exports.RiskOfBiasCompleteFlat( - self.get_queryset().none(), - filename=f"{self.assessment}-{rob_name}-complete", - assessment_id=self.assessment.id, - published_only=published_only, + qs = ( + models.RiskOfBiasScore.objects.filter( + riskofbias__active=True, + riskofbias__study__assessment=self.assessment, + ) + .published_only(published_only) + .order_by("riskofbias__study__short_citation", "riskofbias_id", "id") ) - - return Response(exporter.build_export()) + filename = f"{self.assessment}-{rob_name}-complete" + exporter = exports.RiskOfBiasCompleteExporter.flat_export(qs, filename) + return Response(exporter) @action(detail=False, methods=("post",), permission_classes=(IsAuthenticated,)) def bulk_rob_copy(self, request): diff --git a/hawc/apps/riskofbias/data/exports/RiskOfBiasCompleteFlatSchema.tsv b/hawc/apps/riskofbias/data/exports/RiskOfBiasCompleteFlatSchema.tsv index a5c223c08a..9c2c495903 100644 --- a/hawc/apps/riskofbias/data/exports/RiskOfBiasCompleteFlatSchema.tsv +++ b/hawc/apps/riskofbias/data/exports/RiskOfBiasCompleteFlatSchema.tsv @@ -27,16 +27,16 @@ rob-author_id The study evaluation/risk of bias author HAWC identifier rob-author_name The study evaluation/risk of bias author name rob-created The date the review was created rob-last_updated The date the review was last updated -rob-domain_id Study evaluation/risk of bias domain identifier -rob-domain_name Study evaluation/risk of bias domain name -rob-domain_description Study evaluation/risk of bias domain description -rob-metric_id Study evaluation/risk of bias metric identifier -rob-metric_name Study evaluation/risk of bias metric name -rob-metric_description Study evaluation/risk of bias metric description -rob-score_id Study evaluation/risk of bias metric response id for a unique study/metric pair -rob-score_is_default If multiple responses exist for a study/metric pair, should this one be treated as the default response -rob-score_label If multiple responses exist for a study/metric pair, a label for this response -rob-score_score If a qualitative judgment is made, an integer representation of the judgment value -rob-score_description If a qualitative judgment is made, a text label of the judgment value -rob-score_bias_direction Expected direction of bias, if used. 0= (not entered/unknown), 1=⬆ (away from null), 2=⬇ (towards null) -rob-score_notes Reviewer notes to describe the study evaluation for a study/metric pair +rob_domain-id Study evaluation/risk of bias domain identifier +rob_domain-name Study evaluation/risk of bias domain name +rob_domain-description Study evaluation/risk of bias domain description +rob_metric-id Study evaluation/risk of bias metric identifier +rob_metric-name Study evaluation/risk of bias metric name +rob_metric-description Study evaluation/risk of bias metric description +rob_score-id Study evaluation/risk of bias metric response id for a unique study/metric pair +rob_score-is_default If multiple responses exist for a study/metric pair, should this one be treated as the default response +rob_score-label If multiple responses exist for a study/metric pair, a label for this response +rob_score-score If a qualitative judgment is made, an integer representation of the judgment value +rob_score-description If a qualitative judgment is made, a text label of the judgment value +rob_score-bias_direction Expected direction of bias, if used. 0= (not entered/unknown), 1=⬆ (away from null), 2=⬇ (towards null) +rob_score-notes Reviewer notes to describe the study evaluation for a study/metric pair diff --git a/hawc/apps/riskofbias/data/exports/RiskOfBiasFlatSchema.tsv b/hawc/apps/riskofbias/data/exports/RiskOfBiasFlatSchema.tsv index 4b4c780f59..7f965771b6 100644 --- a/hawc/apps/riskofbias/data/exports/RiskOfBiasFlatSchema.tsv +++ b/hawc/apps/riskofbias/data/exports/RiskOfBiasFlatSchema.tsv @@ -23,16 +23,16 @@ study-published If True, this study, study evaluation, and extraction details ma rob-id Study evaluation/risk of bias review unique identifier rob-created The date the review was created rob-last_updated The date the review was last updated -rob-domain_id Study evaluation/risk of bias domain identifier -rob-domain_name Study evaluation/risk of bias domain name -rob-domain_description Study evaluation/risk of bias domain description -rob-metric_id Study evaluation/risk of bias metric identifier -rob-metric_name Study evaluation/risk of bias metric name -rob-metric_description Study evaluation/risk of bias metric description -rob-score_id Study evaluation/risk of bias metric response id for a unique study/metric pair -rob-score_is_default If multiple responses exist for a study/metric pair, should this one be treated as the default response -rob-score_label If multiple responses exist for a study/metric pair, a label for this response -rob-score_score If a qualitative judgment is made, an integer representation of the judgment value -rob-score_description If a qualitative judgment is made, a text label of the judgment value -rob-score_bias_direction Expected direction of bias, if used. 0= (not entered/unknown), 1=⬆ (away from null), 2=⬇ (towards null) -rob-score_notes Reviewer notes to describe the study evaluation for a study/metric pair +rob_domain-id Study evaluation/risk of bias domain identifier +rob_domain-name Study evaluation/risk of bias domain name +rob_domain-description Study evaluation/risk of bias domain description +rob_metric-id Study evaluation/risk of bias metric identifier +rob_metric-name Study evaluation/risk of bias metric name +rob_metric-description Study evaluation/risk of bias metric description +rob_score-id Study evaluation/risk of bias metric response id for a unique study/metric pair +rob_score-is_default If multiple responses exist for a study/metric pair, should this one be treated as the default response +rob_score-label If multiple responses exist for a study/metric pair, a label for this response +rob_score-score If a qualitative judgment is made, an integer representation of the judgment value +rob_score-description If a qualitative judgment is made, a text label of the judgment value +rob_score-bias_direction Expected direction of bias, if used. 0= (not entered/unknown), 1=⬆ (away from null), 2=⬇ (towards null) +rob_score-notes Reviewer notes to describe the study evaluation for a study/metric pair diff --git a/hawc/apps/riskofbias/exports.py b/hawc/apps/riskofbias/exports.py index 8eb2c4ff8e..bba97ad943 100644 --- a/hawc/apps/riskofbias/exports.py +++ b/hawc/apps/riskofbias/exports.py @@ -1,95 +1,104 @@ import pandas as pd from django.conf import settings - -from ..common.helper import FlatFileExporter -from ..study.models import Study -from ..study.serializers import VerboseStudySerializer -from . import models, serializers - - -class RiskOfBiasFlat(FlatFileExporter): - """ - Returns a complete export of active Final Risk of Bias reviews, without - reviewer information. - """ - - final_only = True # only return final data - - def get_serialized_data(self): - assessment_id = self.kwargs["assessment_id"] - published_only = self.kwargs.get("published_only", True) - qs = ( - Study.objects.filter(assessment_id=assessment_id) - .prefetch_related("identifiers", "riskofbiases__scores__overridden_objects") - .select_related("assessment") - .published_only(published_only) - ) - ser = VerboseStudySerializer(qs, many=True) - study_data = ser.data - - if not self.final_only: - qs = ( - models.RiskOfBias.objects.filter(study__assessment_id=assessment_id, active=True) - .select_related("author") - .prefetch_related("scores__overridden_objects") - .published_only(published_only) - ) - ser = serializers.RiskOfBiasSerializer(qs, many=True) - rob_data = ser.data - for study in study_data: - study["riskofbiases"] = [rob for rob in rob_data if rob["study"] == study["id"]] - - return study_data - - def _get_header_row(self): - header = [] - header.extend(Study.flat_complete_header_row()) - header.extend(models.RiskOfBias.flat_header_row(final_only=self.final_only)) - header.extend(models.RiskOfBiasScore.flat_complete_header_row()) - return header - - def _get_data_rows(self): - rows = [] - for ser in self.get_serialized_data(): - domains = ser["rob_settings"]["domains"] - metrics = ser["rob_settings"]["metrics"] - domain_map = {domain["id"]: domain for domain in domains} - metric_map = { - metric["id"]: dict(metric, domain=domain_map[metric["domain_id"]]) - for metric in metrics - } - - row1 = [] - row1.extend(Study.flat_complete_data_row(ser)) - - robs = [rob for rob in ser.get("riskofbiases", [])] - if self.final_only: - robs = [rob for rob in robs if rob["final"] and rob["active"]] - - for rob in robs: - row2 = list(row1) - row2.extend(models.RiskOfBias.flat_data_row(rob, final_only=self.final_only)) - for score in rob["scores"]: - row3 = list(row2) - score["metric"] = metric_map[score["metric_id"]] - row3.extend(models.RiskOfBiasScore.flat_complete_data_row(score)) - rows.append(row3) - - return rows - - def build_metadata(self) -> pd.DataFrame | None: +from django.db.models import Value + +from ..common.exports import Exporter, ModelExport +from ..common.helper import cleanHTML +from ..common.models import sql_format +from ..study.exports import StudyExport +from . import constants + + +class RiskOfBiasExport(ModelExport): + def get_value_map(self): + return { + "id": "id", + "active": "active", + "final": "final", + "author_id": "author_id", + "author_name": "author_full_name", + "created": "created", + "last_updated": "last_updated", + } + + def get_annotation_map(self, query_prefix): + return { + "author_full_name": sql_format( + "{} {}", query_prefix + "author__first_name", query_prefix + "author__last_name" + ), + } + + def prepare_df(self, df): + return self.format_time(df) + + +class DomainExport(ModelExport): + def get_value_map(self): + return { + "id": "id", + "name": "name", + "description": "description", + } + + +class MetricExport(ModelExport): + def get_value_map(self): + return { + "id": "id", + "name": "name", + "description": "description", + } + + +class RiskOfBiasScoreExport(ModelExport): + def get_value_map(self): + return { + "id": "id", + "is_default": "is_default", + "label": "label", + "score": "score", + "description": Value("?"), + "bias_direction": "bias_direction", + "notes": "notes", + } + + def prepare_df(self, df: pd.DataFrame) -> pd.DataFrame: + if (key := self.get_column_name("description")) in df.columns: + df.loc[:, key] = df[self.get_column_name("score")].map(constants.SCORE_CHOICES_MAP) + if (key := self.get_column_name("notes")) in df.columns: + df.loc[:, key] = df[key].apply(cleanHTML) + return df + + +class RiskOfBiasExporter(Exporter): + def build_modules(self) -> list[ModelExport]: + return [ + StudyExport("study", "riskofbias__study"), + RiskOfBiasExport( + "rob", "riskofbias", exclude=("active", "final", "author_id", "author_name") + ), + DomainExport("rob_domain", "metric__domain"), + MetricExport("rob_metric", "metric"), + RiskOfBiasScoreExport("rob_score", ""), + ] + + @classmethod + def build_metadata(cls, df: pd.DataFrame) -> pd.DataFrame | None: fn = settings.PROJECT_PATH / "apps/riskofbias/data/exports/RiskOfBiasFlatSchema.tsv" return pd.read_csv(fn, delimiter="\t") -class RiskOfBiasCompleteFlat(RiskOfBiasFlat): - """ - Returns a complete export of all Risk of Bias reviews including reviewer - information. - """ - - final_only = False +class RiskOfBiasCompleteExporter(Exporter): + def build_modules(self) -> list[ModelExport]: + return [ + StudyExport("study", "riskofbias__study"), + RiskOfBiasExport("rob", "riskofbias"), + DomainExport("rob_domain", "metric__domain"), + MetricExport("rob_metric", "metric"), + RiskOfBiasScoreExport("rob_score", ""), + ] - def build_metadata(self) -> pd.DataFrame | None: + @classmethod + def build_metadata(cls, df: pd.DataFrame) -> pd.DataFrame | None: fn = settings.PROJECT_PATH / "apps/riskofbias/data/exports/RiskOfBiasCompleteFlatSchema.tsv" return pd.read_csv(fn, delimiter="\t") diff --git a/hawc/apps/riskofbias/managers.py b/hawc/apps/riskofbias/managers.py index 1838e3b4f2..8404581b7c 100644 --- a/hawc/apps/riskofbias/managers.py +++ b/hawc/apps/riskofbias/managers.py @@ -165,6 +165,9 @@ def data_pivot_json(self): score_json=Cast("score_json_tmp", output_field=CharField()), ) + def published_only(self, published_only: bool): + return self.filter(riskofbias__study__published=True) if published_only else self + class RiskOfBiasScoreManager(BaseManager): assessment_relation = "riskofbias__study__assessment" diff --git a/hawc/apps/riskofbias/models.py b/hawc/apps/riskofbias/models.py index 9b6d2227c6..47fa02b4ef 100644 --- a/hawc/apps/riskofbias/models.py +++ b/hawc/apps/riskofbias/models.py @@ -14,7 +14,7 @@ from reversion import revisions as reversion from ..assessment.models import Assessment -from ..common.helper import HAWCDjangoJSONEncoder, SerializerHelper, cleanHTML +from ..common.helper import HAWCDjangoJSONEncoder, SerializerHelper from ..myuser.models import HAWCUser from ..study.models import Study from . import constants, managers @@ -268,25 +268,6 @@ def study_reviews_complete(self): def delete_caches(cls, ids): SerializerHelper.delete_caches(cls, ids) - @staticmethod - def flat_header_row(final_only: bool = True): - col = ["rob-id", "rob-created", "rob-last_updated"] - if not final_only: - col[1:1] = ["rob-active", "rob-final", "rob-author_id", "rob-author_name"] - return col - - @staticmethod - def flat_data_row(ser, final_only: bool = True): - row = [ser["id"], ser["created"], ser["last_updated"]] - if not final_only: - row[1:1] = [ - ser["active"], - ser["final"], - ser["author"]["id"], - ser["author"]["full_name"], - ] - return row - def get_override_options(self) -> dict: """Get risk of bias override options and overrides @@ -391,42 +372,6 @@ def save(self, *args, **kwargs): def get_assessment(self): return self.metric.get_assessment() - @staticmethod - def flat_complete_header_row(): - return ( - "rob-domain_id", - "rob-domain_name", - "rob-domain_description", - "rob-metric_id", - "rob-metric_name", - "rob-metric_description", - "rob-score_id", - "rob-score_is_default", - "rob-score_label", - "rob-score_score", - "rob-score_description", - "rob-score_bias_direction", - "rob-score_notes", - ) - - @staticmethod - def flat_complete_data_row(ser): - return ( - ser["metric"]["domain"]["id"], - ser["metric"]["domain"]["name"], - ser["metric"]["domain"]["description"], - ser["metric"]["id"], - ser["metric"]["name"], - ser["metric"]["description"], - ser["id"], - ser["is_default"], - ser["label"], - ser["score"], - ser["score_description"], - ser["bias_direction"], - cleanHTML(ser["notes"]), - ) - @property def score_symbol(self): return constants.SCORE_SYMBOLS[self.score] diff --git a/hawc/apps/study/exports.py b/hawc/apps/study/exports.py index 3e6f1a4a8b..523206e47f 100644 --- a/hawc/apps/study/exports.py +++ b/hawc/apps/study/exports.py @@ -3,6 +3,7 @@ from django.db.models import Q from ..common.exports import ModelExport +from ..common.helper import cleanHTML from ..common.models import sql_display, sql_format, str_m2m from ..lit.constants import ReferenceDatabase from .constants import CoiReported @@ -53,8 +54,18 @@ def get_annotation_map(self, query_prefix): } def prepare_df(self, df): + # cast from string to nullable int for key in [self.get_column_name("pubmed_id"), self.get_column_name("hero_id")]: - df[key] = pd.to_numeric(df[key], errors="coerce") - for key in [self.get_column_name("doi")]: - df[key] = df[key].replace("", np.nan) + if key in df.columns: + df[key] = pd.to_numeric(df[key], errors="coerce") + + # cast from string to null + doi = self.get_column_name("doi") + if doi in df.columns: + df[doi] = df[doi].replace("", np.nan) + + # clean html text + summary = self.get_column_name("summary") + if summary in df.columns: + df.loc[:, summary] = df[summary].apply(cleanHTML) return df diff --git a/hawc/apps/study/models.py b/hawc/apps/study/models.py index 2cdde04064..f562e96c4c 100644 --- a/hawc/apps/study/models.py +++ b/hawc/apps/study/models.py @@ -10,7 +10,7 @@ from reversion import revisions as reversion from ..assessment.models import Communication -from ..common.helper import SerializerHelper, cleanHTML +from ..common.helper import SerializerHelper from ..lit.models import Reference from . import constants, managers @@ -197,65 +197,6 @@ def get_study_types(self) -> list[str]: types.append(field) return types - @staticmethod - def flat_complete_header_row(): - return ( - "study-id", - "study-hero_id", - "study-pubmed_id", - "study-doi", - "study-url", - "study-short_citation", - "study-full_citation", - "study-coi_reported", - "study-coi_details", - "study-funding_source", - "study-bioassay", - "study-epi", - "study-epi_meta", - "study-in_vitro", - "study-eco", - "study-study_identifier", - "study-contact_author", - "study-ask_author", - "study-summary", - "study-editable", - "study-published", - ) - - @staticmethod - def flat_complete_data_row(ser, identifiers_df: pd.DataFrame | None = None) -> tuple: - try: - ident_row = ( - identifiers_df.loc[ser["id"]] if isinstance(identifiers_df, pd.DataFrame) else None - ) - except KeyError: - ident_row = None - return ( - ser["id"], - # IDs can come from identifiers data frame if exists, else check study serializer - ident_row.hero_id if ident_row is not None else ser.get("hero_id", None), - ident_row.pubmed_id if ident_row is not None else ser.get("pubmed_id", None), - ident_row.doi if ident_row is not None else ser.get("doi", None), - ser["url"], - ser["short_citation"], - ser["full_citation"], - ser["coi_reported"], - ser["coi_details"], - ser["funding_source"], - ser["bioassay"], - ser["epi"], - ser["epi_meta"], - ser["in_vitro"], - ser["eco"], - ser["study_identifier"], - ser["contact_author"], - ser["ask_author"], - cleanHTML(ser["summary"]), - ser["editable"], - ser["published"], - ) - @classmethod def identifiers_df(cls, qs: models.QuerySet, relation: str) -> pd.DataFrame: """Returns a data frame with reference identifiers for each study in the QuerySet diff --git a/hawc/apps/summary/admin.py b/hawc/apps/summary/admin.py index a384ffece6..0f8ecfd38d 100644 --- a/hawc/apps/summary/admin.py +++ b/hawc/apps/summary/admin.py @@ -29,7 +29,6 @@ def show_url(self, obj): return format_html(f"{obj.id}") -@admin.register(models.DataPivotUpload, models.DataPivotQuery) class DataPivotAdmin(admin.ModelAdmin): list_display = ( "title", @@ -40,7 +39,7 @@ class DataPivotAdmin(admin.ModelAdmin): "created", "last_updated", ) - list_filter = ("published", ("assessment", admin.RelatedOnlyFieldListFilter)) + list_filter = ("published", ("evidence_type", admin.RelatedOnlyFieldListFilter)) search_fields = ("assessment__name", "title") @admin.display(description="URL") @@ -48,6 +47,10 @@ def show_url(self, obj): return format_html(f"{obj.id}") +class DataPivotQueryAdmin(DataPivotAdmin): + list_filter = ("published", "evidence_type") + + @admin.register(models.SummaryText) class SummaryTextAdmin(TreeAdmin): list_display = ( @@ -69,3 +72,7 @@ class SummaryTableAdmin(VersionAdmin): ) list_filter = ("table_type", "published", ("assessment", admin.RelatedOnlyFieldListFilter)) + + +admin.site.register(models.DataPivotUpload, DataPivotAdmin) +admin.site.register(models.DataPivotQuery, DataPivotQueryAdmin) diff --git a/hawc/main/urls.py b/hawc/main/urls.py index 63df45031f..d332ca489e 100644 --- a/hawc/main/urls.py +++ b/hawc/main/urls.py @@ -74,6 +74,7 @@ path("epi-meta/api/", include(hawc.apps.epimeta.urls.router.urls)), path("epidemiology/api/", include(hawc.apps.epiv2.urls.router.urls)), path("in-vitro/api/", include(hawc.apps.invitro.urls.router.urls)), + path("udf/api/", include(hawc.apps.udf.urls.router.urls)), path("lit/api/", include(hawc.apps.lit.urls.router.urls)), path("mgmt/api/", include(hawc.apps.mgmt.urls.router.urls)), path("rob/api/", include(hawc.apps.riskofbias.urls.router.urls)), diff --git a/tests/conftest.py b/tests/conftest.py index d969633d24..07e1a0f570 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -98,7 +98,6 @@ def vcr_cassette_dir(request): @pytest.fixture -@pytest.mark.django_db def pm_user(): return get_user_model().objects.get(email="pm@hawcproject.org") diff --git a/tests/data/api/api-dp-data-animal-bioassay-endpoint.json b/tests/data/api/api-dp-data-animal-bioassay-endpoint.json index 21bd0caf45..e0a9949615 100644 --- a/tests/data/api/api-dp-data-animal-bioassay-endpoint.json +++ b/tests/data/api/api-dp-data-animal-bioassay-endpoint.json @@ -58,7 +58,7 @@ "study name": "Biesemeier JA et al. 2011", "study published": true, "system": "developmental", - "tags": "|tag1|", + "tags": "tag1", "treatment period": "developmental (3 days)", "trend test result": "significant", "trend test value": 0.04 @@ -122,7 +122,7 @@ "study name": "Biesemeier JA et al. 2011", "study published": true, "system": "", - "tags": "||", + "tags": "", "treatment period": "developmental (3 days)", "trend test result": "not reported", "trend test value": null @@ -186,7 +186,7 @@ "study name": "Biesemeier JA et al. 2011", "study published": true, "system": "Cardiovascular", - "tags": "||", + "tags": "", "treatment period": "developmental (3 days)", "trend test result": "not reported", "trend test value": null @@ -250,7 +250,7 @@ "study name": "Biesemeier JA et al. 2011", "study published": true, "system": "", - "tags": "||", + "tags": "", "treatment period": "developmental (3 days)", "trend test result": "not reported", "trend test value": null @@ -314,7 +314,7 @@ "study name": "Biesemeier JA et al. 2011", "study published": true, "system": "", - "tags": "||", + "tags": "", "treatment period": "developmental (3 days)", "trend test result": "not reported", "trend test value": null diff --git a/tests/data/api/api-dp-data-epi.json b/tests/data/api/api-dp-data-epi.json index 0e90856514..e30adfc574 100644 --- a/tests/data/api/api-dp-data-epi.json +++ b/tests/data/api/api-dp-data-epi.json @@ -52,7 +52,7 @@ "result name": "partial PTSD", "result population description": "", "result summary": "", - "result tags": "|tag2|", + "result tags": "tag2", "statistical metric": "other", "statistical metric abbreviation": "oth", "statistical metric description": "count", @@ -68,7 +68,7 @@ "study population name": "Tokyo subway victims", "study population source": "", "study published": true, - "tags": "|tag2|", + "tags": "tag2", "upper CI": null, "upper bound interval": null, "upper range": null, @@ -127,7 +127,7 @@ "result name": "partial PTSD", "result population description": "", "result summary": "", - "result tags": "|tag2|", + "result tags": "tag2", "statistical metric": "other", "statistical metric abbreviation": "oth", "statistical metric description": "count", @@ -143,7 +143,7 @@ "study population name": "Tokyo subway victims", "study population source": "", "study published": true, - "tags": "|tag2|", + "tags": "tag2", "upper CI": null, "upper bound interval": null, "upper range": null, @@ -202,7 +202,7 @@ "result name": "partial PTSD", "result population description": "", "result summary": "", - "result tags": "|tag2|", + "result tags": "tag2", "statistical metric": "other", "statistical metric abbreviation": "oth", "statistical metric description": "count", @@ -218,7 +218,7 @@ "study population name": "Tokyo subway victims", "study population source": "", "study published": true, - "tags": "|tag2|", + "tags": "tag2", "upper CI": null, "upper bound interval": null, "upper range": null, diff --git a/tests/data/api/api-dp-data-invitro-endpoint-group.json b/tests/data/api/api-dp-data-invitro-endpoint-group.json index b37ad7df0d..214776fe15 100644 --- a/tests/data/api/api-dp-data-invitro-endpoint-group.json +++ b/tests/data/api/api-dp-data-invitro-endpoint-group.json @@ -152,4 +152,4 @@ "transfection": "-", "trend test result": "not significant" } -] \ No newline at end of file +] diff --git a/tests/data/api/api-dp-data-invitro-endpoint.json b/tests/data/api/api-dp-data-invitro-endpoint.json index 2583bb41c4..7481c5bdec 100644 --- a/tests/data/api/api-dp-data-invitro-endpoint.json +++ b/tests/data/api/api-dp-data-invitro-endpoint.json @@ -56,4 +56,4 @@ "study pubmed_id": 15907334, "trend test result": "not significant" } -] \ No newline at end of file +] diff --git a/tests/data/api/api-epi-assessment-export-unpublished-False.json b/tests/data/api/api-epi-assessment-export-unpublished-False.json index 5efd59de66..d905d935c1 100644 --- a/tests/data/api/api-epi-assessment-export-unpublished-False.json +++ b/tests/data/api/api-epi-assessment-export-unpublished-False.json @@ -1,14 +1,14 @@ [ { - "cs-created": "2020-05-10T22:14:03.887387-04:00", + "cs-created": "2020-05-10T22:14:03.887387-0400", "cs-description": "sarin released at five points in the Tokyo subway systems", "cs-id": 1, - "cs-last_updated": "2020-05-10T22:14:17.353405-04:00", + "cs-last_updated": "2020-05-10T22:14:17.353405-0400", "cs-name": "Tokyo subway victims (different groups)", "cs-url": "/epi/comparison-set/1/", "exposure-age_of_exposure": "", "exposure-analytical_method": "NA", - "exposure-created": "2020-05-10T22:11:48.360112-04:00", + "exposure-created": "2020-05-10T22:11:48.360112-0400", "exposure-dermal": false, "exposure-description": "", "exposure-duration": "", @@ -17,7 +17,7 @@ "exposure-in_utero": false, "exposure-inhalation": true, "exposure-iv": false, - "exposure-last_updated": "2020-05-10T22:11:48.360136-04:00", + "exposure-last_updated": "2020-05-10T22:11:48.360136-0400", "exposure-measured": "Sarin", "exposure-metric": "air", "exposure-metric_description": "sarin released at five points in the Tokyo subway systems", @@ -31,14 +31,14 @@ "exposure-url": "/epi/exposure/1/", "group-comments": "", "group-comparative_name": "", - "group-created": "2020-05-10T22:14:04.009669-04:00", + "group-created": "2020-05-10T22:14:04.009669-0400", "group-eligible_n": 582, "group-ethnicities": "Asian", "group-group_id": 0, "group-id": 1, "group-invited_n": null, "group-isControl": null, - "group-last_updated": "2020-05-10T22:14:17.432631-04:00", + "group-last_updated": "2020-05-10T22:14:17.432631-0400", "group-name": "Tokyo (St. Luke) respondents 1997", "group-numeric": null, "group-participant_n": 283, @@ -47,14 +47,14 @@ "metric-id": 2, "metric-name": "other", "outcome-age_of_measurement": "", - "outcome-created": "2020-05-10T22:21:56.870317-04:00", + "outcome-created": "2020-05-10T22:21:56.870317-0400", "outcome-diagnostic": "other", "outcome-diagnostic_description": "NR", "outcome-effect": "neurological: behavior", "outcome-effect_subtype": "", "outcome-effects": "tag2", "outcome-id": 4, - "outcome-last_updated": "2020-05-10T22:21:56.870345-04:00", + "outcome-last_updated": "2020-05-10T22:21:56.870345-0400", "outcome-name": "partial PTSD", "outcome-outcome_n": null, "outcome-summary": "", @@ -64,13 +64,13 @@ "result-adjustment_factors_considered": "", "result-ci_units": 0.95, "result-comments": "", - "result-created": "2020-05-10T22:23:49.490734-04:00", + "result-created": "2020-05-10T22:23:49.490734-0400", "result-data_location": "Table 2", "result-dose_response": "not-applicable", "result-dose_response_details": "", "result-estimate_type": "---", "result-id": 1, - "result-last_updated": "2020-05-10T22:25:38.225671-04:00", + "result-last_updated": "2020-05-10T22:25:38.225671-0400", "result-metric_description": "count", "result-metric_units": "#", "result-name": "partial PTSD", @@ -81,11 +81,11 @@ "result-statistical_test_results": "", "result-trend_test": "", "result-variance_type": "---", - "result_group-created": "2020-05-10T22:23:49.737387-04:00", + "result_group-created": "2020-05-10T22:23:49.737387-0400", "result_group-estimate": 20.0, "result_group-id": 1, "result_group-is_main_finding": false, - "result_group-last_updated": "2020-05-10T22:23:49.737414-04:00", + "result_group-last_updated": "2020-05-10T22:23:49.737414-0400", "result_group-lower_bound_interval": null, "result_group-lower_ci": null, "result_group-lower_range": null, @@ -101,14 +101,14 @@ "sp-comments": "Descriptions.", "sp-confounding_criteria": "have an exposure timing during the periconceptional period or during pregnancy", "sp-countries": "Japan", - "sp-created": "2020-05-10T22:09:54.288594-04:00", + "sp-created": "2020-05-10T22:09:54.288594-0400", "sp-design": "Case series", "sp-eligible_n": null, "sp-exclusion_criteria": "include an evaluation of the direct association between folic acid exposure and one of the outcomes of interest", "sp-id": 1, "sp-inclusion_criteria": "\"heterogeneity of exposure and outcome\" excluded 9 of the 14 studies that met the inclusion criteria", "sp-invited_n": null, - "sp-last_updated": "2020-05-10T22:09:54.288617-04:00", + "sp-last_updated": "2020-05-10T22:09:54.288617-0400", "sp-name": "Tokyo subway victims", "sp-participant_n": 582, "sp-region": "Tokyo", @@ -138,15 +138,15 @@ "study-url": "/study/5/" }, { - "cs-created": "2020-05-10T22:14:03.887387-04:00", + "cs-created": "2020-05-10T22:14:03.887387-0400", "cs-description": "sarin released at five points in the Tokyo subway systems", "cs-id": 1, - "cs-last_updated": "2020-05-10T22:14:17.353405-04:00", + "cs-last_updated": "2020-05-10T22:14:17.353405-0400", "cs-name": "Tokyo subway victims (different groups)", "cs-url": "/epi/comparison-set/1/", "exposure-age_of_exposure": "", "exposure-analytical_method": "NA", - "exposure-created": "2020-05-10T22:11:48.360112-04:00", + "exposure-created": "2020-05-10T22:11:48.360112-0400", "exposure-dermal": false, "exposure-description": "", "exposure-duration": "", @@ -155,7 +155,7 @@ "exposure-in_utero": false, "exposure-inhalation": true, "exposure-iv": false, - "exposure-last_updated": "2020-05-10T22:11:48.360136-04:00", + "exposure-last_updated": "2020-05-10T22:11:48.360136-0400", "exposure-measured": "Sarin", "exposure-metric": "air", "exposure-metric_description": "sarin released at five points in the Tokyo subway systems", @@ -169,14 +169,14 @@ "exposure-url": "/epi/exposure/1/", "group-comments": "", "group-comparative_name": "", - "group-created": "2020-05-10T22:14:04.247759-04:00", + "group-created": "2020-05-10T22:14:04.247759-0400", "group-eligible_n": 582, "group-ethnicities": "Asian", "group-group_id": 1, "group-id": 2, "group-invited_n": null, "group-isControl": null, - "group-last_updated": "2020-05-10T22:14:17.591544-04:00", + "group-last_updated": "2020-05-10T22:14:17.591544-0400", "group-name": "Tokyo (St. Luke) respondents 1998", "group-numeric": null, "group-participant_n": 206, @@ -185,14 +185,14 @@ "metric-id": 2, "metric-name": "other", "outcome-age_of_measurement": "", - "outcome-created": "2020-05-10T22:21:56.870317-04:00", + "outcome-created": "2020-05-10T22:21:56.870317-0400", "outcome-diagnostic": "other", "outcome-diagnostic_description": "NR", "outcome-effect": "neurological: behavior", "outcome-effect_subtype": "", "outcome-effects": "tag2", "outcome-id": 4, - "outcome-last_updated": "2020-05-10T22:21:56.870345-04:00", + "outcome-last_updated": "2020-05-10T22:21:56.870345-0400", "outcome-name": "partial PTSD", "outcome-outcome_n": null, "outcome-summary": "", @@ -202,13 +202,13 @@ "result-adjustment_factors_considered": "", "result-ci_units": 0.95, "result-comments": "", - "result-created": "2020-05-10T22:23:49.490734-04:00", + "result-created": "2020-05-10T22:23:49.490734-0400", "result-data_location": "Table 2", "result-dose_response": "not-applicable", "result-dose_response_details": "", "result-estimate_type": "---", "result-id": 1, - "result-last_updated": "2020-05-10T22:25:38.225671-04:00", + "result-last_updated": "2020-05-10T22:25:38.225671-0400", "result-metric_description": "count", "result-metric_units": "#", "result-name": "partial PTSD", @@ -219,11 +219,11 @@ "result-statistical_test_results": "", "result-trend_test": "", "result-variance_type": "---", - "result_group-created": "2020-05-10T22:23:49.806018-04:00", + "result_group-created": "2020-05-10T22:23:49.806018-0400", "result_group-estimate": 15.0, "result_group-id": 2, "result_group-is_main_finding": false, - "result_group-last_updated": "2020-05-10T22:23:49.806044-04:00", + "result_group-last_updated": "2020-05-10T22:23:49.806044-0400", "result_group-lower_bound_interval": null, "result_group-lower_ci": null, "result_group-lower_range": null, @@ -239,14 +239,14 @@ "sp-comments": "Descriptions.", "sp-confounding_criteria": "have an exposure timing during the periconceptional period or during pregnancy", "sp-countries": "Japan", - "sp-created": "2020-05-10T22:09:54.288594-04:00", + "sp-created": "2020-05-10T22:09:54.288594-0400", "sp-design": "Case series", "sp-eligible_n": null, "sp-exclusion_criteria": "include an evaluation of the direct association between folic acid exposure and one of the outcomes of interest", "sp-id": 1, "sp-inclusion_criteria": "\"heterogeneity of exposure and outcome\" excluded 9 of the 14 studies that met the inclusion criteria", "sp-invited_n": null, - "sp-last_updated": "2020-05-10T22:09:54.288617-04:00", + "sp-last_updated": "2020-05-10T22:09:54.288617-0400", "sp-name": "Tokyo subway victims", "sp-participant_n": 582, "sp-region": "Tokyo", @@ -276,15 +276,15 @@ "study-url": "/study/5/" }, { - "cs-created": "2020-05-10T22:14:03.887387-04:00", + "cs-created": "2020-05-10T22:14:03.887387-0400", "cs-description": "sarin released at five points in the Tokyo subway systems", "cs-id": 1, - "cs-last_updated": "2020-05-10T22:14:17.353405-04:00", + "cs-last_updated": "2020-05-10T22:14:17.353405-0400", "cs-name": "Tokyo subway victims (different groups)", "cs-url": "/epi/comparison-set/1/", "exposure-age_of_exposure": "", "exposure-analytical_method": "NA", - "exposure-created": "2020-05-10T22:11:48.360112-04:00", + "exposure-created": "2020-05-10T22:11:48.360112-0400", "exposure-dermal": false, "exposure-description": "", "exposure-duration": "", @@ -293,7 +293,7 @@ "exposure-in_utero": false, "exposure-inhalation": true, "exposure-iv": false, - "exposure-last_updated": "2020-05-10T22:11:48.360136-04:00", + "exposure-last_updated": "2020-05-10T22:11:48.360136-0400", "exposure-measured": "Sarin", "exposure-metric": "air", "exposure-metric_description": "sarin released at five points in the Tokyo subway systems", @@ -307,14 +307,14 @@ "exposure-url": "/epi/exposure/1/", "group-comments": "", "group-comparative_name": "", - "group-created": "2020-05-10T22:14:04.403130-04:00", + "group-created": "2020-05-10T22:14:04.403130-0400", "group-eligible_n": 582, "group-ethnicities": "Asian", "group-group_id": 2, "group-id": 3, "group-invited_n": null, "group-isControl": null, - "group-last_updated": "2020-05-10T22:14:17.746856-04:00", + "group-last_updated": "2020-05-10T22:14:17.746856-0400", "group-name": "Tokyo (St. Luke) respondents 2000", "group-numeric": null, "group-participant_n": 191, @@ -323,14 +323,14 @@ "metric-id": 2, "metric-name": "other", "outcome-age_of_measurement": "", - "outcome-created": "2020-05-10T22:21:56.870317-04:00", + "outcome-created": "2020-05-10T22:21:56.870317-0400", "outcome-diagnostic": "other", "outcome-diagnostic_description": "NR", "outcome-effect": "neurological: behavior", "outcome-effect_subtype": "", "outcome-effects": "tag2", "outcome-id": 4, - "outcome-last_updated": "2020-05-10T22:21:56.870345-04:00", + "outcome-last_updated": "2020-05-10T22:21:56.870345-0400", "outcome-name": "partial PTSD", "outcome-outcome_n": null, "outcome-summary": "", @@ -340,13 +340,13 @@ "result-adjustment_factors_considered": "", "result-ci_units": 0.95, "result-comments": "", - "result-created": "2020-05-10T22:23:49.490734-04:00", + "result-created": "2020-05-10T22:23:49.490734-0400", "result-data_location": "Table 2", "result-dose_response": "not-applicable", "result-dose_response_details": "", "result-estimate_type": "---", "result-id": 1, - "result-last_updated": "2020-05-10T22:25:38.225671-04:00", + "result-last_updated": "2020-05-10T22:25:38.225671-0400", "result-metric_description": "count", "result-metric_units": "#", "result-name": "partial PTSD", @@ -357,11 +357,11 @@ "result-statistical_test_results": "", "result-trend_test": "", "result-variance_type": "---", - "result_group-created": "2020-05-10T22:23:49.859670-04:00", + "result_group-created": "2020-05-10T22:23:49.859670-0400", "result_group-estimate": 16.0, "result_group-id": 3, "result_group-is_main_finding": false, - "result_group-last_updated": "2020-05-10T22:23:49.859696-04:00", + "result_group-last_updated": "2020-05-10T22:23:49.859696-0400", "result_group-lower_bound_interval": null, "result_group-lower_ci": null, "result_group-lower_range": null, @@ -377,14 +377,14 @@ "sp-comments": "Descriptions.", "sp-confounding_criteria": "have an exposure timing during the periconceptional period or during pregnancy", "sp-countries": "Japan", - "sp-created": "2020-05-10T22:09:54.288594-04:00", + "sp-created": "2020-05-10T22:09:54.288594-0400", "sp-design": "Case series", "sp-eligible_n": null, "sp-exclusion_criteria": "include an evaluation of the direct association between folic acid exposure and one of the outcomes of interest", "sp-id": 1, "sp-inclusion_criteria": "\"heterogeneity of exposure and outcome\" excluded 9 of the 14 studies that met the inclusion criteria", "sp-invited_n": null, - "sp-last_updated": "2020-05-10T22:09:54.288617-04:00", + "sp-last_updated": "2020-05-10T22:09:54.288617-0400", "sp-name": "Tokyo subway victims", "sp-participant_n": 582, "sp-region": "Tokyo", diff --git a/tests/data/api/api-epi-assessment-export-unpublished-True.json b/tests/data/api/api-epi-assessment-export-unpublished-True.json index 5efd59de66..d905d935c1 100644 --- a/tests/data/api/api-epi-assessment-export-unpublished-True.json +++ b/tests/data/api/api-epi-assessment-export-unpublished-True.json @@ -1,14 +1,14 @@ [ { - "cs-created": "2020-05-10T22:14:03.887387-04:00", + "cs-created": "2020-05-10T22:14:03.887387-0400", "cs-description": "sarin released at five points in the Tokyo subway systems", "cs-id": 1, - "cs-last_updated": "2020-05-10T22:14:17.353405-04:00", + "cs-last_updated": "2020-05-10T22:14:17.353405-0400", "cs-name": "Tokyo subway victims (different groups)", "cs-url": "/epi/comparison-set/1/", "exposure-age_of_exposure": "", "exposure-analytical_method": "NA", - "exposure-created": "2020-05-10T22:11:48.360112-04:00", + "exposure-created": "2020-05-10T22:11:48.360112-0400", "exposure-dermal": false, "exposure-description": "", "exposure-duration": "", @@ -17,7 +17,7 @@ "exposure-in_utero": false, "exposure-inhalation": true, "exposure-iv": false, - "exposure-last_updated": "2020-05-10T22:11:48.360136-04:00", + "exposure-last_updated": "2020-05-10T22:11:48.360136-0400", "exposure-measured": "Sarin", "exposure-metric": "air", "exposure-metric_description": "sarin released at five points in the Tokyo subway systems", @@ -31,14 +31,14 @@ "exposure-url": "/epi/exposure/1/", "group-comments": "", "group-comparative_name": "", - "group-created": "2020-05-10T22:14:04.009669-04:00", + "group-created": "2020-05-10T22:14:04.009669-0400", "group-eligible_n": 582, "group-ethnicities": "Asian", "group-group_id": 0, "group-id": 1, "group-invited_n": null, "group-isControl": null, - "group-last_updated": "2020-05-10T22:14:17.432631-04:00", + "group-last_updated": "2020-05-10T22:14:17.432631-0400", "group-name": "Tokyo (St. Luke) respondents 1997", "group-numeric": null, "group-participant_n": 283, @@ -47,14 +47,14 @@ "metric-id": 2, "metric-name": "other", "outcome-age_of_measurement": "", - "outcome-created": "2020-05-10T22:21:56.870317-04:00", + "outcome-created": "2020-05-10T22:21:56.870317-0400", "outcome-diagnostic": "other", "outcome-diagnostic_description": "NR", "outcome-effect": "neurological: behavior", "outcome-effect_subtype": "", "outcome-effects": "tag2", "outcome-id": 4, - "outcome-last_updated": "2020-05-10T22:21:56.870345-04:00", + "outcome-last_updated": "2020-05-10T22:21:56.870345-0400", "outcome-name": "partial PTSD", "outcome-outcome_n": null, "outcome-summary": "", @@ -64,13 +64,13 @@ "result-adjustment_factors_considered": "", "result-ci_units": 0.95, "result-comments": "", - "result-created": "2020-05-10T22:23:49.490734-04:00", + "result-created": "2020-05-10T22:23:49.490734-0400", "result-data_location": "Table 2", "result-dose_response": "not-applicable", "result-dose_response_details": "", "result-estimate_type": "---", "result-id": 1, - "result-last_updated": "2020-05-10T22:25:38.225671-04:00", + "result-last_updated": "2020-05-10T22:25:38.225671-0400", "result-metric_description": "count", "result-metric_units": "#", "result-name": "partial PTSD", @@ -81,11 +81,11 @@ "result-statistical_test_results": "", "result-trend_test": "", "result-variance_type": "---", - "result_group-created": "2020-05-10T22:23:49.737387-04:00", + "result_group-created": "2020-05-10T22:23:49.737387-0400", "result_group-estimate": 20.0, "result_group-id": 1, "result_group-is_main_finding": false, - "result_group-last_updated": "2020-05-10T22:23:49.737414-04:00", + "result_group-last_updated": "2020-05-10T22:23:49.737414-0400", "result_group-lower_bound_interval": null, "result_group-lower_ci": null, "result_group-lower_range": null, @@ -101,14 +101,14 @@ "sp-comments": "Descriptions.", "sp-confounding_criteria": "have an exposure timing during the periconceptional period or during pregnancy", "sp-countries": "Japan", - "sp-created": "2020-05-10T22:09:54.288594-04:00", + "sp-created": "2020-05-10T22:09:54.288594-0400", "sp-design": "Case series", "sp-eligible_n": null, "sp-exclusion_criteria": "include an evaluation of the direct association between folic acid exposure and one of the outcomes of interest", "sp-id": 1, "sp-inclusion_criteria": "\"heterogeneity of exposure and outcome\" excluded 9 of the 14 studies that met the inclusion criteria", "sp-invited_n": null, - "sp-last_updated": "2020-05-10T22:09:54.288617-04:00", + "sp-last_updated": "2020-05-10T22:09:54.288617-0400", "sp-name": "Tokyo subway victims", "sp-participant_n": 582, "sp-region": "Tokyo", @@ -138,15 +138,15 @@ "study-url": "/study/5/" }, { - "cs-created": "2020-05-10T22:14:03.887387-04:00", + "cs-created": "2020-05-10T22:14:03.887387-0400", "cs-description": "sarin released at five points in the Tokyo subway systems", "cs-id": 1, - "cs-last_updated": "2020-05-10T22:14:17.353405-04:00", + "cs-last_updated": "2020-05-10T22:14:17.353405-0400", "cs-name": "Tokyo subway victims (different groups)", "cs-url": "/epi/comparison-set/1/", "exposure-age_of_exposure": "", "exposure-analytical_method": "NA", - "exposure-created": "2020-05-10T22:11:48.360112-04:00", + "exposure-created": "2020-05-10T22:11:48.360112-0400", "exposure-dermal": false, "exposure-description": "", "exposure-duration": "", @@ -155,7 +155,7 @@ "exposure-in_utero": false, "exposure-inhalation": true, "exposure-iv": false, - "exposure-last_updated": "2020-05-10T22:11:48.360136-04:00", + "exposure-last_updated": "2020-05-10T22:11:48.360136-0400", "exposure-measured": "Sarin", "exposure-metric": "air", "exposure-metric_description": "sarin released at five points in the Tokyo subway systems", @@ -169,14 +169,14 @@ "exposure-url": "/epi/exposure/1/", "group-comments": "", "group-comparative_name": "", - "group-created": "2020-05-10T22:14:04.247759-04:00", + "group-created": "2020-05-10T22:14:04.247759-0400", "group-eligible_n": 582, "group-ethnicities": "Asian", "group-group_id": 1, "group-id": 2, "group-invited_n": null, "group-isControl": null, - "group-last_updated": "2020-05-10T22:14:17.591544-04:00", + "group-last_updated": "2020-05-10T22:14:17.591544-0400", "group-name": "Tokyo (St. Luke) respondents 1998", "group-numeric": null, "group-participant_n": 206, @@ -185,14 +185,14 @@ "metric-id": 2, "metric-name": "other", "outcome-age_of_measurement": "", - "outcome-created": "2020-05-10T22:21:56.870317-04:00", + "outcome-created": "2020-05-10T22:21:56.870317-0400", "outcome-diagnostic": "other", "outcome-diagnostic_description": "NR", "outcome-effect": "neurological: behavior", "outcome-effect_subtype": "", "outcome-effects": "tag2", "outcome-id": 4, - "outcome-last_updated": "2020-05-10T22:21:56.870345-04:00", + "outcome-last_updated": "2020-05-10T22:21:56.870345-0400", "outcome-name": "partial PTSD", "outcome-outcome_n": null, "outcome-summary": "", @@ -202,13 +202,13 @@ "result-adjustment_factors_considered": "", "result-ci_units": 0.95, "result-comments": "", - "result-created": "2020-05-10T22:23:49.490734-04:00", + "result-created": "2020-05-10T22:23:49.490734-0400", "result-data_location": "Table 2", "result-dose_response": "not-applicable", "result-dose_response_details": "", "result-estimate_type": "---", "result-id": 1, - "result-last_updated": "2020-05-10T22:25:38.225671-04:00", + "result-last_updated": "2020-05-10T22:25:38.225671-0400", "result-metric_description": "count", "result-metric_units": "#", "result-name": "partial PTSD", @@ -219,11 +219,11 @@ "result-statistical_test_results": "", "result-trend_test": "", "result-variance_type": "---", - "result_group-created": "2020-05-10T22:23:49.806018-04:00", + "result_group-created": "2020-05-10T22:23:49.806018-0400", "result_group-estimate": 15.0, "result_group-id": 2, "result_group-is_main_finding": false, - "result_group-last_updated": "2020-05-10T22:23:49.806044-04:00", + "result_group-last_updated": "2020-05-10T22:23:49.806044-0400", "result_group-lower_bound_interval": null, "result_group-lower_ci": null, "result_group-lower_range": null, @@ -239,14 +239,14 @@ "sp-comments": "Descriptions.", "sp-confounding_criteria": "have an exposure timing during the periconceptional period or during pregnancy", "sp-countries": "Japan", - "sp-created": "2020-05-10T22:09:54.288594-04:00", + "sp-created": "2020-05-10T22:09:54.288594-0400", "sp-design": "Case series", "sp-eligible_n": null, "sp-exclusion_criteria": "include an evaluation of the direct association between folic acid exposure and one of the outcomes of interest", "sp-id": 1, "sp-inclusion_criteria": "\"heterogeneity of exposure and outcome\" excluded 9 of the 14 studies that met the inclusion criteria", "sp-invited_n": null, - "sp-last_updated": "2020-05-10T22:09:54.288617-04:00", + "sp-last_updated": "2020-05-10T22:09:54.288617-0400", "sp-name": "Tokyo subway victims", "sp-participant_n": 582, "sp-region": "Tokyo", @@ -276,15 +276,15 @@ "study-url": "/study/5/" }, { - "cs-created": "2020-05-10T22:14:03.887387-04:00", + "cs-created": "2020-05-10T22:14:03.887387-0400", "cs-description": "sarin released at five points in the Tokyo subway systems", "cs-id": 1, - "cs-last_updated": "2020-05-10T22:14:17.353405-04:00", + "cs-last_updated": "2020-05-10T22:14:17.353405-0400", "cs-name": "Tokyo subway victims (different groups)", "cs-url": "/epi/comparison-set/1/", "exposure-age_of_exposure": "", "exposure-analytical_method": "NA", - "exposure-created": "2020-05-10T22:11:48.360112-04:00", + "exposure-created": "2020-05-10T22:11:48.360112-0400", "exposure-dermal": false, "exposure-description": "", "exposure-duration": "", @@ -293,7 +293,7 @@ "exposure-in_utero": false, "exposure-inhalation": true, "exposure-iv": false, - "exposure-last_updated": "2020-05-10T22:11:48.360136-04:00", + "exposure-last_updated": "2020-05-10T22:11:48.360136-0400", "exposure-measured": "Sarin", "exposure-metric": "air", "exposure-metric_description": "sarin released at five points in the Tokyo subway systems", @@ -307,14 +307,14 @@ "exposure-url": "/epi/exposure/1/", "group-comments": "", "group-comparative_name": "", - "group-created": "2020-05-10T22:14:04.403130-04:00", + "group-created": "2020-05-10T22:14:04.403130-0400", "group-eligible_n": 582, "group-ethnicities": "Asian", "group-group_id": 2, "group-id": 3, "group-invited_n": null, "group-isControl": null, - "group-last_updated": "2020-05-10T22:14:17.746856-04:00", + "group-last_updated": "2020-05-10T22:14:17.746856-0400", "group-name": "Tokyo (St. Luke) respondents 2000", "group-numeric": null, "group-participant_n": 191, @@ -323,14 +323,14 @@ "metric-id": 2, "metric-name": "other", "outcome-age_of_measurement": "", - "outcome-created": "2020-05-10T22:21:56.870317-04:00", + "outcome-created": "2020-05-10T22:21:56.870317-0400", "outcome-diagnostic": "other", "outcome-diagnostic_description": "NR", "outcome-effect": "neurological: behavior", "outcome-effect_subtype": "", "outcome-effects": "tag2", "outcome-id": 4, - "outcome-last_updated": "2020-05-10T22:21:56.870345-04:00", + "outcome-last_updated": "2020-05-10T22:21:56.870345-0400", "outcome-name": "partial PTSD", "outcome-outcome_n": null, "outcome-summary": "", @@ -340,13 +340,13 @@ "result-adjustment_factors_considered": "", "result-ci_units": 0.95, "result-comments": "", - "result-created": "2020-05-10T22:23:49.490734-04:00", + "result-created": "2020-05-10T22:23:49.490734-0400", "result-data_location": "Table 2", "result-dose_response": "not-applicable", "result-dose_response_details": "", "result-estimate_type": "---", "result-id": 1, - "result-last_updated": "2020-05-10T22:25:38.225671-04:00", + "result-last_updated": "2020-05-10T22:25:38.225671-0400", "result-metric_description": "count", "result-metric_units": "#", "result-name": "partial PTSD", @@ -357,11 +357,11 @@ "result-statistical_test_results": "", "result-trend_test": "", "result-variance_type": "---", - "result_group-created": "2020-05-10T22:23:49.859670-04:00", + "result_group-created": "2020-05-10T22:23:49.859670-0400", "result_group-estimate": 16.0, "result_group-id": 3, "result_group-is_main_finding": false, - "result_group-last_updated": "2020-05-10T22:23:49.859696-04:00", + "result_group-last_updated": "2020-05-10T22:23:49.859696-0400", "result_group-lower_bound_interval": null, "result_group-lower_ci": null, "result_group-lower_range": null, @@ -377,14 +377,14 @@ "sp-comments": "Descriptions.", "sp-confounding_criteria": "have an exposure timing during the periconceptional period or during pregnancy", "sp-countries": "Japan", - "sp-created": "2020-05-10T22:09:54.288594-04:00", + "sp-created": "2020-05-10T22:09:54.288594-0400", "sp-design": "Case series", "sp-eligible_n": null, "sp-exclusion_criteria": "include an evaluation of the direct association between folic acid exposure and one of the outcomes of interest", "sp-id": 1, "sp-inclusion_criteria": "\"heterogeneity of exposure and outcome\" excluded 9 of the 14 studies that met the inclusion criteria", "sp-invited_n": null, - "sp-last_updated": "2020-05-10T22:09:54.288617-04:00", + "sp-last_updated": "2020-05-10T22:09:54.288617-0400", "sp-name": "Tokyo subway victims", "sp-participant_n": 582, "sp-region": "Tokyo", diff --git a/tests/data/api/api-invitro-assessment-full-export.json b/tests/data/api/api-invitro-assessment-full-export.json index 2583bb41c4..7481c5bdec 100644 --- a/tests/data/api/api-invitro-assessment-full-export.json +++ b/tests/data/api/api-invitro-assessment-full-export.json @@ -56,4 +56,4 @@ "study pubmed_id": 15907334, "trend test result": "not significant" } -] \ No newline at end of file +] diff --git a/tests/data/api/api-rob-assessment-export.json b/tests/data/api/api-rob-assessment-export.json index 8a3d462cc8..a0b9f6233c 100644 --- a/tests/data/api/api-rob-assessment-export.json +++ b/tests/data/api/api-rob-assessment-export.json @@ -1,21 +1,21 @@ [ { - "rob-created": "2020-05-08T13:56:46.183552-04:00", - "rob-domain_description": "

description

", - "rob-domain_id": 10, - "rob-domain_name": "domain 1", + "rob-created": "2020-05-08T13:56:46.183552-0400", "rob-id": 6, - "rob-last_updated": "2020-05-08T15:34:31.468702-04:00", - "rob-metric_description": "

description

", - "rob-metric_id": 14, - "rob-metric_name": "metric 1", - "rob-score_bias_direction": 1, - "rob-score_description": "Probably low risk of bias", - "rob-score_id": 14, - "rob-score_is_default": true, - "rob-score_label": "test1", - "rob-score_notes": "test", - "rob-score_score": 16, + "rob-last_updated": "2020-05-08T15:34:31.468702-0400", + "rob_domain-description": "

description

", + "rob_domain-id": 10, + "rob_domain-name": "domain 1", + "rob_metric-description": "

description

", + "rob_metric-id": 14, + "rob_metric-name": "metric 1", + "rob_score-bias_direction": 1, + "rob_score-description": "Probably low risk of bias", + "rob_score-id": 14, + "rob_score-is_default": true, + "rob_score-label": "test1", + "rob_score-notes": "test", + "rob_score-score": 16, "study-ask_author": "not really (example)", "study-bioassay": true, "study-coi_details": "J.B., H.S., J.A., S.J., M.H. and T.S. are employed by specialty chemical manufacturers whose product lines include brominated flame retardants. M.B., N.M., A.R., D.W.S. and D.G.S. are employed by WIL Research Laboratories, a contract research organization commissioned to conduct the guideline DNT study presented herein. L.F. is employed with BioSTAT Consultants and was commissioned to design and evaluate the statistical aspects of the DNT study. The views and opinions expressed in this article are those of the authors and not necessarily those of their respective employers.", @@ -39,22 +39,22 @@ "study-url": "/study/7/" }, { - "rob-created": "2020-05-08T13:56:46.183552-04:00", - "rob-domain_description": "

description

", - "rob-domain_id": 10, - "rob-domain_name": "domain 1", + "rob-created": "2020-05-08T13:56:46.183552-0400", "rob-id": 6, - "rob-last_updated": "2020-05-08T15:34:31.468702-04:00", - "rob-metric_description": "

description

", - "rob-metric_id": 14, - "rob-metric_name": "metric 1", - "rob-score_bias_direction": 1, - "rob-score_description": "Definitely high risk of bias", - "rob-score_id": 16, - "rob-score_is_default": false, - "rob-score_label": "test2", - "rob-score_notes": "beep", - "rob-score_score": 14, + "rob-last_updated": "2020-05-08T15:34:31.468702-0400", + "rob_domain-description": "

description

", + "rob_domain-id": 11, + "rob_domain-name": "overall", + "rob_metric-description": "

overall description

", + "rob_metric-id": 15, + "rob_metric-name": "overall metric 1", + "rob_score-bias_direction": 1, + "rob_score-description": "Probably low risk of bias", + "rob_score-id": 15, + "rob_score-is_default": true, + "rob_score-label": "", + "rob_score-notes": "test", + "rob_score-score": 16, "study-ask_author": "not really (example)", "study-bioassay": true, "study-coi_details": "J.B., H.S., J.A., S.J., M.H. and T.S. are employed by specialty chemical manufacturers whose product lines include brominated flame retardants. M.B., N.M., A.R., D.W.S. and D.G.S. are employed by WIL Research Laboratories, a contract research organization commissioned to conduct the guideline DNT study presented herein. L.F. is employed with BioSTAT Consultants and was commissioned to design and evaluate the statistical aspects of the DNT study. The views and opinions expressed in this article are those of the authors and not necessarily those of their respective employers.", @@ -78,22 +78,22 @@ "study-url": "/study/7/" }, { - "rob-created": "2020-05-08T13:56:46.183552-04:00", - "rob-domain_description": "

description

", - "rob-domain_id": 11, - "rob-domain_name": "overall", + "rob-created": "2020-05-08T13:56:46.183552-0400", "rob-id": 6, - "rob-last_updated": "2020-05-08T15:34:31.468702-04:00", - "rob-metric_description": "

overall description

", - "rob-metric_id": 15, - "rob-metric_name": "overall metric 1", - "rob-score_bias_direction": 1, - "rob-score_description": "Probably low risk of bias", - "rob-score_id": 15, - "rob-score_is_default": true, - "rob-score_label": "", - "rob-score_notes": "test", - "rob-score_score": 16, + "rob-last_updated": "2020-05-08T15:34:31.468702-0400", + "rob_domain-description": "

description

", + "rob_domain-id": 10, + "rob_domain-name": "domain 1", + "rob_metric-description": "

description

", + "rob_metric-id": 14, + "rob_metric-name": "metric 1", + "rob_score-bias_direction": 1, + "rob_score-description": "Definitely high risk of bias", + "rob_score-id": 16, + "rob_score-is_default": false, + "rob_score-label": "test2", + "rob_score-notes": "beep", + "rob_score-score": 14, "study-ask_author": "not really (example)", "study-bioassay": true, "study-coi_details": "J.B., H.S., J.A., S.J., M.H. and T.S. are employed by specialty chemical manufacturers whose product lines include brominated flame retardants. M.B., N.M., A.R., D.W.S. and D.G.S. are employed by WIL Research Laboratories, a contract research organization commissioned to conduct the guideline DNT study presented herein. L.F. is employed with BioSTAT Consultants and was commissioned to design and evaluate the statistical aspects of the DNT study. The views and opinions expressed in this article are those of the authors and not necessarily those of their respective employers.", diff --git a/tests/data/api/api-rob-assessment-full-export.json b/tests/data/api/api-rob-assessment-full-export.json index 52ac6c882d..46dc3c952b 100644 --- a/tests/data/api/api-rob-assessment-full-export.json +++ b/tests/data/api/api-rob-assessment-full-export.json @@ -3,23 +3,23 @@ "rob-active": true, "rob-author_id": 3, "rob-author_name": "Team Member", - "rob-created": "2020-05-08T13:56:45.599914-04:00", - "rob-domain_description": "

description

", - "rob-domain_id": 10, - "rob-domain_name": "domain 1", + "rob-created": "2020-05-08T13:56:45.599914-0400", "rob-final": false, "rob-id": 4, - "rob-last_updated": "2020-05-08T15:33:23.187120-04:00", - "rob-metric_description": "

description

", - "rob-metric_id": 14, - "rob-metric_name": "metric 1", - "rob-score_bias_direction": 0, - "rob-score_description": "Definitely low risk of bias", - "rob-score_id": 10, - "rob-score_is_default": true, - "rob-score_label": "", - "rob-score_notes": "test", - "rob-score_score": 17, + "rob-last_updated": "2020-05-08T15:33:23.187120-0400", + "rob_domain-description": "

description

", + "rob_domain-id": 10, + "rob_domain-name": "domain 1", + "rob_metric-description": "

description

", + "rob_metric-id": 14, + "rob_metric-name": "metric 1", + "rob_score-bias_direction": 0, + "rob_score-description": "Definitely low risk of bias", + "rob_score-id": 10, + "rob_score-is_default": true, + "rob_score-label": "", + "rob_score-notes": "test", + "rob_score-score": 17, "study-ask_author": "not really (example)", "study-bioassay": true, "study-coi_details": "J.B., H.S., J.A., S.J., M.H. and T.S. are employed by specialty chemical manufacturers whose product lines include brominated flame retardants. M.B., N.M., A.R., D.W.S. and D.G.S. are employed by WIL Research Laboratories, a contract research organization commissioned to conduct the guideline DNT study presented herein. L.F. is employed with BioSTAT Consultants and was commissioned to design and evaluate the statistical aspects of the DNT study. The views and opinions expressed in this article are those of the authors and not necessarily those of their respective employers.", @@ -46,23 +46,23 @@ "rob-active": true, "rob-author_id": 3, "rob-author_name": "Team Member", - "rob-created": "2020-05-08T13:56:45.599914-04:00", - "rob-domain_description": "

description

", - "rob-domain_id": 11, - "rob-domain_name": "overall", + "rob-created": "2020-05-08T13:56:45.599914-0400", "rob-final": false, "rob-id": 4, - "rob-last_updated": "2020-05-08T15:33:23.187120-04:00", - "rob-metric_description": "

overall description

", - "rob-metric_id": 15, - "rob-metric_name": "overall metric 1", - "rob-score_bias_direction": 1, - "rob-score_description": "Definitely high risk of bias", - "rob-score_id": 11, - "rob-score_is_default": true, - "rob-score_label": "", - "rob-score_notes": "test2", - "rob-score_score": 14, + "rob-last_updated": "2020-05-08T15:33:23.187120-0400", + "rob_domain-description": "

description

", + "rob_domain-id": 11, + "rob_domain-name": "overall", + "rob_metric-description": "

overall description

", + "rob_metric-id": 15, + "rob_metric-name": "overall metric 1", + "rob_score-bias_direction": 1, + "rob_score-description": "Definitely high risk of bias", + "rob_score-id": 11, + "rob_score-is_default": true, + "rob_score-label": "", + "rob_score-notes": "test2", + "rob_score-score": 14, "study-ask_author": "not really (example)", "study-bioassay": true, "study-coi_details": "J.B., H.S., J.A., S.J., M.H. and T.S. are employed by specialty chemical manufacturers whose product lines include brominated flame retardants. M.B., N.M., A.R., D.W.S. and D.G.S. are employed by WIL Research Laboratories, a contract research organization commissioned to conduct the guideline DNT study presented herein. L.F. is employed with BioSTAT Consultants and was commissioned to design and evaluate the statistical aspects of the DNT study. The views and opinions expressed in this article are those of the authors and not necessarily those of their respective employers.", @@ -89,23 +89,23 @@ "rob-active": true, "rob-author_id": 2, "rob-author_name": "Project Manager", - "rob-created": "2020-05-08T13:56:45.903109-04:00", - "rob-domain_description": "

description

", - "rob-domain_id": 10, - "rob-domain_name": "domain 1", + "rob-created": "2020-05-08T13:56:45.903109-0400", "rob-final": false, "rob-id": 5, - "rob-last_updated": "2020-05-08T15:33:43.045401-04:00", - "rob-metric_description": "

description

", - "rob-metric_id": 14, - "rob-metric_name": "metric 1", - "rob-score_bias_direction": 0, - "rob-score_description": "Probably low risk of bias", - "rob-score_id": 12, - "rob-score_is_default": true, - "rob-score_label": "", - "rob-score_notes": "test", - "rob-score_score": 16, + "rob-last_updated": "2020-05-08T15:33:43.045401-0400", + "rob_domain-description": "

description

", + "rob_domain-id": 10, + "rob_domain-name": "domain 1", + "rob_metric-description": "

description

", + "rob_metric-id": 14, + "rob_metric-name": "metric 1", + "rob_score-bias_direction": 0, + "rob_score-description": "Probably low risk of bias", + "rob_score-id": 12, + "rob_score-is_default": true, + "rob_score-label": "", + "rob_score-notes": "test", + "rob_score-score": 16, "study-ask_author": "not really (example)", "study-bioassay": true, "study-coi_details": "J.B., H.S., J.A., S.J., M.H. and T.S. are employed by specialty chemical manufacturers whose product lines include brominated flame retardants. M.B., N.M., A.R., D.W.S. and D.G.S. are employed by WIL Research Laboratories, a contract research organization commissioned to conduct the guideline DNT study presented herein. L.F. is employed with BioSTAT Consultants and was commissioned to design and evaluate the statistical aspects of the DNT study. The views and opinions expressed in this article are those of the authors and not necessarily those of their respective employers.", @@ -132,23 +132,23 @@ "rob-active": true, "rob-author_id": 2, "rob-author_name": "Project Manager", - "rob-created": "2020-05-08T13:56:45.903109-04:00", - "rob-domain_description": "

description

", - "rob-domain_id": 11, - "rob-domain_name": "overall", + "rob-created": "2020-05-08T13:56:45.903109-0400", "rob-final": false, "rob-id": 5, - "rob-last_updated": "2020-05-08T15:33:43.045401-04:00", - "rob-metric_description": "

overall description

", - "rob-metric_id": 15, - "rob-metric_name": "overall metric 1", - "rob-score_bias_direction": 0, - "rob-score_description": "Definitely low risk of bias", - "rob-score_id": 13, - "rob-score_is_default": true, - "rob-score_label": "", - "rob-score_notes": "test", - "rob-score_score": 17, + "rob-last_updated": "2020-05-08T15:33:43.045401-0400", + "rob_domain-description": "

description

", + "rob_domain-id": 11, + "rob_domain-name": "overall", + "rob_metric-description": "

overall description

", + "rob_metric-id": 15, + "rob_metric-name": "overall metric 1", + "rob_score-bias_direction": 0, + "rob_score-description": "Definitely low risk of bias", + "rob_score-id": 13, + "rob_score-is_default": true, + "rob_score-label": "", + "rob_score-notes": "test", + "rob_score-score": 17, "study-ask_author": "not really (example)", "study-bioassay": true, "study-coi_details": "J.B., H.S., J.A., S.J., M.H. and T.S. are employed by specialty chemical manufacturers whose product lines include brominated flame retardants. M.B., N.M., A.R., D.W.S. and D.G.S. are employed by WIL Research Laboratories, a contract research organization commissioned to conduct the guideline DNT study presented herein. L.F. is employed with BioSTAT Consultants and was commissioned to design and evaluate the statistical aspects of the DNT study. The views and opinions expressed in this article are those of the authors and not necessarily those of their respective employers.", @@ -175,23 +175,23 @@ "rob-active": true, "rob-author_id": 3, "rob-author_name": "Team Member", - "rob-created": "2020-05-08T13:56:46.183552-04:00", - "rob-domain_description": "

description

", - "rob-domain_id": 10, - "rob-domain_name": "domain 1", + "rob-created": "2020-05-08T13:56:46.183552-0400", "rob-final": true, "rob-id": 6, - "rob-last_updated": "2020-05-08T15:34:31.468702-04:00", - "rob-metric_description": "

description

", - "rob-metric_id": 14, - "rob-metric_name": "metric 1", - "rob-score_bias_direction": 1, - "rob-score_description": "Probably low risk of bias", - "rob-score_id": 14, - "rob-score_is_default": true, - "rob-score_label": "test1", - "rob-score_notes": "test", - "rob-score_score": 16, + "rob-last_updated": "2020-05-08T15:34:31.468702-0400", + "rob_domain-description": "

description

", + "rob_domain-id": 10, + "rob_domain-name": "domain 1", + "rob_metric-description": "

description

", + "rob_metric-id": 14, + "rob_metric-name": "metric 1", + "rob_score-bias_direction": 1, + "rob_score-description": "Probably low risk of bias", + "rob_score-id": 14, + "rob_score-is_default": true, + "rob_score-label": "test1", + "rob_score-notes": "test", + "rob_score-score": 16, "study-ask_author": "not really (example)", "study-bioassay": true, "study-coi_details": "J.B., H.S., J.A., S.J., M.H. and T.S. are employed by specialty chemical manufacturers whose product lines include brominated flame retardants. M.B., N.M., A.R., D.W.S. and D.G.S. are employed by WIL Research Laboratories, a contract research organization commissioned to conduct the guideline DNT study presented herein. L.F. is employed with BioSTAT Consultants and was commissioned to design and evaluate the statistical aspects of the DNT study. The views and opinions expressed in this article are those of the authors and not necessarily those of their respective employers.", @@ -218,23 +218,23 @@ "rob-active": true, "rob-author_id": 3, "rob-author_name": "Team Member", - "rob-created": "2020-05-08T13:56:46.183552-04:00", - "rob-domain_description": "

description

", - "rob-domain_id": 10, - "rob-domain_name": "domain 1", + "rob-created": "2020-05-08T13:56:46.183552-0400", "rob-final": true, "rob-id": 6, - "rob-last_updated": "2020-05-08T15:34:31.468702-04:00", - "rob-metric_description": "

description

", - "rob-metric_id": 14, - "rob-metric_name": "metric 1", - "rob-score_bias_direction": 1, - "rob-score_description": "Definitely high risk of bias", - "rob-score_id": 16, - "rob-score_is_default": false, - "rob-score_label": "test2", - "rob-score_notes": "beep", - "rob-score_score": 14, + "rob-last_updated": "2020-05-08T15:34:31.468702-0400", + "rob_domain-description": "

description

", + "rob_domain-id": 11, + "rob_domain-name": "overall", + "rob_metric-description": "

overall description

", + "rob_metric-id": 15, + "rob_metric-name": "overall metric 1", + "rob_score-bias_direction": 1, + "rob_score-description": "Probably low risk of bias", + "rob_score-id": 15, + "rob_score-is_default": true, + "rob_score-label": "", + "rob_score-notes": "test", + "rob_score-score": 16, "study-ask_author": "not really (example)", "study-bioassay": true, "study-coi_details": "J.B., H.S., J.A., S.J., M.H. and T.S. are employed by specialty chemical manufacturers whose product lines include brominated flame retardants. M.B., N.M., A.R., D.W.S. and D.G.S. are employed by WIL Research Laboratories, a contract research organization commissioned to conduct the guideline DNT study presented herein. L.F. is employed with BioSTAT Consultants and was commissioned to design and evaluate the statistical aspects of the DNT study. The views and opinions expressed in this article are those of the authors and not necessarily those of their respective employers.", @@ -261,23 +261,23 @@ "rob-active": true, "rob-author_id": 3, "rob-author_name": "Team Member", - "rob-created": "2020-05-08T13:56:46.183552-04:00", - "rob-domain_description": "

description

", - "rob-domain_id": 11, - "rob-domain_name": "overall", + "rob-created": "2020-05-08T13:56:46.183552-0400", "rob-final": true, "rob-id": 6, - "rob-last_updated": "2020-05-08T15:34:31.468702-04:00", - "rob-metric_description": "

overall description

", - "rob-metric_id": 15, - "rob-metric_name": "overall metric 1", - "rob-score_bias_direction": 1, - "rob-score_description": "Probably low risk of bias", - "rob-score_id": 15, - "rob-score_is_default": true, - "rob-score_label": "", - "rob-score_notes": "test", - "rob-score_score": 16, + "rob-last_updated": "2020-05-08T15:34:31.468702-0400", + "rob_domain-description": "

description

", + "rob_domain-id": 10, + "rob_domain-name": "domain 1", + "rob_metric-description": "

description

", + "rob_metric-id": 14, + "rob_metric-name": "metric 1", + "rob_score-bias_direction": 1, + "rob_score-description": "Definitely high risk of bias", + "rob_score-id": 16, + "rob_score-is_default": false, + "rob_score-label": "test2", + "rob_score-notes": "beep", + "rob_score-score": 14, "study-ask_author": "not really (example)", "study-bioassay": true, "study-coi_details": "J.B., H.S., J.A., S.J., M.H. and T.S. are employed by specialty chemical manufacturers whose product lines include brominated flame retardants. M.B., N.M., A.R., D.W.S. and D.G.S. are employed by WIL Research Laboratories, a contract research organization commissioned to conduct the guideline DNT study presented herein. L.F. is employed with BioSTAT Consultants and was commissioned to design and evaluate the statistical aspects of the DNT study. The views and opinions expressed in this article are those of the authors and not necessarily those of their respective employers.", @@ -304,23 +304,23 @@ "rob-active": true, "rob-author_id": 3, "rob-author_name": "Team Member", - "rob-created": "2021-09-02T11:57:26.492990-04:00", - "rob-domain_description": "

description

", - "rob-domain_id": 10, - "rob-domain_name": "domain 1", + "rob-created": "2021-09-02T11:57:26.492990-0400", "rob-final": false, "rob-id": 7, - "rob-last_updated": "2021-09-02T11:57:26.733132-04:00", - "rob-metric_description": "

description

", - "rob-metric_id": 14, - "rob-metric_name": "metric 1", - "rob-score_bias_direction": 0, - "rob-score_description": "Not reported", - "rob-score_id": 17, - "rob-score_is_default": true, - "rob-score_label": "", - "rob-score_notes": "", - "rob-score_score": 12, + "rob-last_updated": "2021-09-02T11:57:26.733132-0400", + "rob_domain-description": "

description

", + "rob_domain-id": 10, + "rob_domain-name": "domain 1", + "rob_metric-description": "

description

", + "rob_metric-id": 14, + "rob_metric-name": "metric 1", + "rob_score-bias_direction": 0, + "rob_score-description": "Not reported", + "rob_score-id": 17, + "rob_score-is_default": true, + "rob_score-label": "", + "rob_score-notes": "", + "rob_score-score": 12, "study-ask_author": "", "study-bioassay": false, "study-coi_details": "No COI", @@ -347,23 +347,23 @@ "rob-active": true, "rob-author_id": 3, "rob-author_name": "Team Member", - "rob-created": "2021-09-02T11:57:26.492990-04:00", - "rob-domain_description": "

description

", - "rob-domain_id": 11, - "rob-domain_name": "overall", + "rob-created": "2021-09-02T11:57:26.492990-0400", "rob-final": false, "rob-id": 7, - "rob-last_updated": "2021-09-02T11:57:26.733132-04:00", - "rob-metric_description": "

overall description

", - "rob-metric_id": 15, - "rob-metric_name": "overall metric 1", - "rob-score_bias_direction": 0, - "rob-score_description": "Not reported", - "rob-score_id": 18, - "rob-score_is_default": true, - "rob-score_label": "", - "rob-score_notes": "", - "rob-score_score": 12, + "rob-last_updated": "2021-09-02T11:57:26.733132-0400", + "rob_domain-description": "

description

", + "rob_domain-id": 11, + "rob_domain-name": "overall", + "rob_metric-description": "

overall description

", + "rob_metric-id": 15, + "rob_metric-name": "overall metric 1", + "rob_score-bias_direction": 0, + "rob_score-description": "Not reported", + "rob_score-id": 18, + "rob_score-is_default": true, + "rob_score-label": "", + "rob_score-notes": "", + "rob_score-score": 12, "study-ask_author": "", "study-bioassay": false, "study-coi_details": "No COI", @@ -390,23 +390,23 @@ "rob-active": true, "rob-author_id": 3, "rob-author_name": "Team Member", - "rob-created": "2021-09-02T11:57:27.448945-04:00", - "rob-domain_description": "

description

", - "rob-domain_id": 10, - "rob-domain_name": "domain 1", + "rob-created": "2021-09-02T11:57:27.448945-0400", "rob-final": false, "rob-id": 8, - "rob-last_updated": "2021-09-02T11:57:27.851056-04:00", - "rob-metric_description": "

description

", - "rob-metric_id": 14, - "rob-metric_name": "metric 1", - "rob-score_bias_direction": 0, - "rob-score_description": "Not reported", - "rob-score_id": 19, - "rob-score_is_default": true, - "rob-score_label": "", - "rob-score_notes": "", - "rob-score_score": 12, + "rob-last_updated": "2021-09-02T11:57:27.851056-0400", + "rob_domain-description": "

description

", + "rob_domain-id": 10, + "rob_domain-name": "domain 1", + "rob_metric-description": "

description

", + "rob_metric-id": 14, + "rob_metric-name": "metric 1", + "rob_score-bias_direction": 0, + "rob_score-description": "Not reported", + "rob_score-id": 19, + "rob_score-is_default": true, + "rob_score-label": "", + "rob_score-notes": "", + "rob_score-score": 12, "study-ask_author": "", "study-bioassay": false, "study-coi_details": "", @@ -433,23 +433,23 @@ "rob-active": true, "rob-author_id": 3, "rob-author_name": "Team Member", - "rob-created": "2021-09-02T11:57:27.448945-04:00", - "rob-domain_description": "

description

", - "rob-domain_id": 11, - "rob-domain_name": "overall", + "rob-created": "2021-09-02T11:57:27.448945-0400", "rob-final": false, "rob-id": 8, - "rob-last_updated": "2021-09-02T11:57:27.851056-04:00", - "rob-metric_description": "

overall description

", - "rob-metric_id": 15, - "rob-metric_name": "overall metric 1", - "rob-score_bias_direction": 0, - "rob-score_description": "Not reported", - "rob-score_id": 20, - "rob-score_is_default": true, - "rob-score_label": "", - "rob-score_notes": "", - "rob-score_score": 12, + "rob-last_updated": "2021-09-02T11:57:27.851056-0400", + "rob_domain-description": "

description

", + "rob_domain-id": 11, + "rob_domain-name": "overall", + "rob_metric-description": "

overall description

", + "rob_metric-id": 15, + "rob_metric-name": "overall metric 1", + "rob_score-bias_direction": 0, + "rob_score-description": "Not reported", + "rob_score-id": 20, + "rob_score-is_default": true, + "rob_score-label": "", + "rob_score-notes": "", + "rob_score-score": 12, "study-ask_author": "", "study-bioassay": false, "study-coi_details": "", @@ -476,23 +476,23 @@ "rob-active": true, "rob-author_id": 3, "rob-author_name": "Team Member", - "rob-created": "2021-09-02T11:57:28.629575-04:00", - "rob-domain_description": "

description

", - "rob-domain_id": 10, - "rob-domain_name": "domain 1", + "rob-created": "2021-09-02T11:57:28.629575-0400", "rob-final": false, "rob-id": 9, - "rob-last_updated": "2021-09-02T11:57:29.033255-04:00", - "rob-metric_description": "

description

", - "rob-metric_id": 14, - "rob-metric_name": "metric 1", - "rob-score_bias_direction": 0, - "rob-score_description": "Not reported", - "rob-score_id": 21, - "rob-score_is_default": true, - "rob-score_label": "", - "rob-score_notes": "", - "rob-score_score": 12, + "rob-last_updated": "2021-09-02T11:57:29.033255-0400", + "rob_domain-description": "

description

", + "rob_domain-id": 10, + "rob_domain-name": "domain 1", + "rob_metric-description": "

description

", + "rob_metric-id": 14, + "rob_metric-name": "metric 1", + "rob_score-bias_direction": 0, + "rob_score-description": "Not reported", + "rob_score-id": 21, + "rob_score-is_default": true, + "rob_score-label": "", + "rob_score-notes": "", + "rob_score-score": 12, "study-ask_author": "", "study-bioassay": false, "study-coi_details": "", @@ -519,23 +519,23 @@ "rob-active": true, "rob-author_id": 3, "rob-author_name": "Team Member", - "rob-created": "2021-09-02T11:57:28.629575-04:00", - "rob-domain_description": "

description

", - "rob-domain_id": 11, - "rob-domain_name": "overall", + "rob-created": "2021-09-02T11:57:28.629575-0400", "rob-final": false, "rob-id": 9, - "rob-last_updated": "2021-09-02T11:57:29.033255-04:00", - "rob-metric_description": "

overall description

", - "rob-metric_id": 15, - "rob-metric_name": "overall metric 1", - "rob-score_bias_direction": 0, - "rob-score_description": "Not reported", - "rob-score_id": 22, - "rob-score_is_default": true, - "rob-score_label": "", - "rob-score_notes": "", - "rob-score_score": 12, + "rob-last_updated": "2021-09-02T11:57:29.033255-0400", + "rob_domain-description": "

description

", + "rob_domain-id": 11, + "rob_domain-name": "overall", + "rob_metric-description": "

overall description

", + "rob_metric-id": 15, + "rob_metric-name": "overall metric 1", + "rob_score-bias_direction": 0, + "rob_score-description": "Not reported", + "rob_score-id": 22, + "rob_score-is_default": true, + "rob_score-label": "", + "rob_score-notes": "", + "rob_score-score": 12, "study-ask_author": "", "study-bioassay": false, "study-coi_details": "", diff --git a/tests/hawc/apps/animal/test_exports.py b/tests/hawc/apps/animal/test_exports.py index 4b1f617bbe..d773455223 100644 --- a/tests/hawc/apps/animal/test_exports.py +++ b/tests/hawc/apps/animal/test_exports.py @@ -1,107 +1,28 @@ -from hawc.apps.animal.constants import DataType -from hawc.apps.animal.exports import EndpointFlatDataPivot, get_significance_and_direction +import pandas as pd +import pytest +from pandas.testing import assert_series_equal - -def test_get_significance_and_direction(): - # no data - resp = get_significance_and_direction( - DataType.CONTINUOUS, - [], - ) - assert resp == [] - - # continuous - resp = get_significance_and_direction( - DataType.CONTINUOUS, - [ - dict(significant=False, response=0), - dict(significant=False, response=1), - dict(significant=True, response=0), - dict(significant=True, response=-1), - dict(significant=True, response=1), - ], - ) - assert resp == ["No", "No", "Yes - ?", "Yes - ↓", "Yes - ↑"] - - # dichotomous - resp = get_significance_and_direction( - DataType.DICHOTOMOUS, - [ - dict(percent_affected=0, significant=False), - dict(percent_affected=10, significant=False), - dict(percent_affected=20, significant=True), - ], - ) - assert resp == ["No", "No", "Yes - ↑"] - - resp = get_significance_and_direction( - DataType.DICHOTOMOUS_CANCER, - [ - dict(percent_affected=50, significant=False), - dict(percent_affected=40, significant=False), - dict(percent_affected=30, significant=True), - ], - ) - assert resp == ["No", "No", "Yes - ↓"] - - # percent diff - resp = get_significance_and_direction( - DataType.CONTINUOUS, - [ - dict(significant=False, response=0), - dict(significant=False, response=0), - dict(significant=True, response=0), - dict(significant=True, response=-1), - dict(significant=True, response=1), - ], - ) - assert resp == ["No", "No", "Yes - ?", "Yes - ↓", "Yes - ↑"] +from hawc.apps.animal import exports +from hawc.apps.animal.models import Endpoint +@pytest.mark.django_db class TestEndpointFlatDataPivot: - def test_dose_low_high(self): - # returns a tuple of the lowest non-zero dose - # and the highest dose - func = EndpointFlatDataPivot._dose_low_high - - # all of these doses are present - valid_doses = [0.0, 1.0, 20.0, 300.0] - (low, high) = func(valid_doses) - assert low == 1.0 and high == 300.0 - - # if a dose is not present, it will be None - one_invalid_dose = [0.0, 1.0, None, 300.0] - (low, high) = func(one_invalid_dose) - assert low == 1.0 and high == 300.0 - - # missing doses can affect lowest dose - invalid_low_dose = [0.0, None, 20.0, 300.0] - (low, high) = func(invalid_low_dose) - assert low == 20.0 and high == 300.0 - - # missing doses can affect highest dose - invalid_high_dose = [0.0, 1.0, 20.0, None] - (low, high) = func(invalid_high_dose) - assert low == 1.0 and high == 20.0 - - # if only one valid dose, it will be both lowest and highest - one_valid_dose = [0.0, None, 20.0, None] - (low, high) = func(one_valid_dose) - assert low == 20.0 and high == 20.0 - - # if no valid dose, lowest and highest is None - invalid_doses = [0.0, None, None, None] - (low, high) = func(invalid_doses) - assert low is None and high is None - - def test_dose_is_reported(self): - func = EndpointFlatDataPivot._dose_is_reported - - # check that dose is reported even when value is falsy but not None - assert func(1, [dict(dose_group_id=1, n=0)]) is True - assert func(1, [dict(dose_group_id=1, response=0)]) is True - assert func(1, [dict(dose_group_id=1, incidence=0)]) is True - - assert func(1, []) is False - assert func(1, [dict(dose_group_id=1)]) is False - assert func(1, [dict(dose_group_id=1, n=None, response=None, incidence=None)]) is False + def test_handle_treatment_period(self): + # list of tuples, first is inputs, second is outputs + # inputs: (type_display, duration_exposure_text) + # expected outputs: treatment period + expected = [ + (("1-generation reproductive", ""), "1-generation reproductive"), + (("Short-term (1-30 days)", ""), "short-term"), + (("Short-term (1-30 days)", "30 days"), "short-term (30 days)"), + ] + df = pd.DataFrame( + data=[el[0] for el in expected], + columns=["experiment-type_display", "dosing_regime-duration_exposure_text"], + ) + expected_output = pd.Series(data=[el[1] for el in expected], name="treatment period") + + exporter = exports.EndpointFlatDataPivot(queryset=Endpoint.objects.none()) + df2 = exporter.handle_treatment_period(df) + assert_series_equal(df2["treatment period"], expected_output) diff --git a/tests/hawc/apps/common/test_helper.py b/tests/hawc/apps/common/test_helper.py index e684d937c2..6ec8c7b70f 100644 --- a/tests/hawc/apps/common/test_helper.py +++ b/tests/hawc/apps/common/test_helper.py @@ -35,6 +35,20 @@ def test_get_flattened_tags(self): ) +@pytest.mark.parametrize( + "kw,expected", + [ + [dict(items=list("abcde"), target="c", after="b", n_cols=2), "abcde"], + [dict(items=list("abcde"), target="b", after=None), "bacde"], + [dict(items=list("abcde"), target="c", after="a", n_cols=2), "acdbe"], + [dict(items=list("abcde"), target="d", after="b"), "abdce"], + [dict(items=list("abcde"), target="b", after="d", n_cols=2), "adbce"], + ], +) +def test_reorder_list(kw, expected): + assert "".join(helper.reorder_list(**kw)) == expected + + def test_df_move_column(): df = pd.read_csv(StringIO("a,b,c\n1,2,3")) @@ -146,3 +160,16 @@ def test_fields(self): ) def test_flatten(input, expected): assert list(helper.flatten(input)) == expected + + +@pytest.mark.parametrize( + "input,expected", + [ + ([], []), + (["a"], ["a"]), + (["a", "a", "a"], ["a", "a (2)", "a (3)"]), + (["a", "b", "a"], ["a", "b", "a (2)"]), + ], +) +def test_unique_text_list(input, expected): + assert list(helper.unique_text_list(input)) == expected diff --git a/tests/hawc/apps/epi/test_api.py b/tests/hawc/apps/epi/test_api.py index bd0bb49937..23d4eece92 100644 --- a/tests/hawc/apps/epi/test_api.py +++ b/tests/hawc/apps/epi/test_api.py @@ -46,7 +46,6 @@ def test_permissions(self, db_keys): assert team_client.get(url).status_code == 200 def test_full_export(self, rewrite_data_files: bool, db_keys): - rewrite_data_files = True # published fn = "api-epi-assessment-export-unpublished-False.json" url = reverse("epi:api:assessment-export", args=(db_keys.assessment_final,)) diff --git a/tests/hawc/apps/epi/test_exports.py b/tests/hawc/apps/epi/test_exports.py new file mode 100644 index 0000000000..c66b337b40 --- /dev/null +++ b/tests/hawc/apps/epi/test_exports.py @@ -0,0 +1,48 @@ +import pandas as pd +import pytest +from numpy import nan +from pandas.testing import assert_frame_equal + +from hawc.apps.epi import exports +from hawc.apps.epi.models import Outcome + + +@pytest.mark.django_db +class TestOutcomeDataPivot: + def test_add_ci(self): + # two tuples, one is inputs, one is expected outputs + # inputs: (lower_ci, upper_ci, n, estimate, variance, variance_type) + # expected outputs: (lower_ci, upper_ci) + data = [ + # keep entered values instead of calculating + ((1.0, 2.0, None, None, None, ""), (1.0, 2.0)), + ((1.0, None, None, None, None, ""), (1.0, None)), + ((None, 2.0, None, None, None, ""), (None, 2.0)), + ((1.0, 2.0, 10, 30, 2, "SD"), (1.0, 2.0)), + # calculate + ((None, None, 1, 30, 2, "SD"), (4.59, 55.41)), + ((None, None, 10, 30, 2, "SD"), (28.56, 31.43)), + ((None, None, 10, 30, 2, "SE"), (25.48, 34.52)), + # ok bad cases + ((None, None, None, None, None, "bad"), (None, None)), + ((None, None, 10, 30, 2, "bad"), (None, None)), + ] + input_df = pd.DataFrame( + data=[el1 for el1, _ in data], + columns=[ + "result_group-lower_ci", + "result_group-upper_ci", + "result_group-n", + "result_group-estimate", + "result_group-variance", + "result-variance_type", + ], + ).replace({nan: None}) + expected_df = pd.DataFrame( + data=[el2 for _, el2 in data], + columns=["result_group-lower_ci", "result_group-upper_ci"], + ) + + exporter = exports.OutcomeDataPivot(queryset=Outcome.objects.none()) + output_df = exporter._add_ci(input_df)[["result_group-lower_ci", "result_group-upper_ci"]] + assert_frame_equal(output_df, expected_df, atol=0.01) diff --git a/tests/hawc/apps/riskofbias/test_exports.py b/tests/hawc/apps/riskofbias/test_exports.py index 28218c5717..1a71e45d48 100644 --- a/tests/hawc/apps/riskofbias/test_exports.py +++ b/tests/hawc/apps/riskofbias/test_exports.py @@ -3,7 +3,7 @@ from hawc.apps.common.helper import FlatExport from hawc.apps.riskofbias import exports -from hawc.apps.riskofbias.models import RiskOfBias +from hawc.apps.riskofbias.models import RiskOfBiasScore def check_metadata_accuracy(export: FlatExport): @@ -16,16 +16,14 @@ def check_metadata_accuracy(export: FlatExport): @pytest.mark.django_db class TestRiskOfBiasFlat: def test_metadata(self): - qs = RiskOfBias.objects.none() - exporter = exports.RiskOfBiasFlat(qs, filename="test", assessment_id=1) - export = exporter.build_export() + qs = RiskOfBiasScore.objects.none() + export = exports.RiskOfBiasExporter.flat_export(qs, filename="test") check_metadata_accuracy(export) @pytest.mark.django_db class TestRiskOfBiasCompleteFlat: def test_metadata(self): - qs = RiskOfBias.objects.none() - exporter = exports.RiskOfBiasCompleteFlat(qs, filename="test", assessment_id=1) - export = exporter.build_export() + qs = RiskOfBiasScore.objects.none() + export = exports.RiskOfBiasCompleteExporter.flat_export(qs, filename="test") check_metadata_accuracy(export)