From 1399b5453cd1bed38bf097437d0e4f46d8aade5e Mon Sep 17 00:00:00 2001 From: catalamarti Date: Fri, 18 Oct 2024 10:35:48 +0100 Subject: [PATCH 1/7] progress --- R/summariseClinicalRecords.R | 341 ++++++++++++++++++++------------- R/summariseInObservation.R | 91 ++++----- R/summariseObservationPeriod.R | 2 +- R/summariseRecordCount.R | 27 ++- 4 files changed, 268 insertions(+), 193 deletions(-) diff --git a/R/summariseClinicalRecords.R b/R/summariseClinicalRecords.R index b6f67e0..51935d9 100644 --- a/R/summariseClinicalRecords.R +++ b/R/summariseClinicalRecords.R @@ -55,15 +55,16 @@ summariseClinicalRecords <- function(cdm, ageGroup = NULL) { # Initial checks ---- omopgenerics::validateCdmArgument(cdm) - omopTableName |> - omopgenerics::assertChoice(choices = omopgenerics::omopTables()) + opts <- omopgenerics::omopTables() + opts <- opts[opts %in% names(cdm)] + omopgenerics::assertChoice(omopTableName, choices = opts) estimates <- PatientProfiles::availableEstimates( variableType = "numeric", fullQuantiles = TRUE) |> dplyr::pull("estimate_name") omopgenerics::assertChoice(recordsPerPerson, choices = estimates, null = TRUE) - recordsPerPerson <- unique(recordsPerPerson) + if (is.null(recordsPerPerson)) recordsPerPerson <- character() omopgenerics::assertLogical(inObservation, length = 1) omopgenerics::assertLogical(standardConcept, length = 1) @@ -71,43 +72,50 @@ summariseClinicalRecords <- function(cdm, omopgenerics::assertLogical(domainId, length = 1) omopgenerics::assertLogical(typeConcept, length = 1) omopgenerics::assertLogical(sex, length = 1) - ageGroup <- omopgenerics::validateAgeGroupArgument(ageGroup, ageGroupName = "")[[1]] - - result <- purrr::map(omopTableName, - function(x) { - if(omopgenerics::isTableEmpty(cdm[[x]])) { - cli::cli_warn(paste0(x, " omop table is empty. Returning an empty summarised omop table.")) - return(omopgenerics::emptySummarisedResult()) - } - summariseClinicalRecord(x, - cdm = cdm, - recordsPerPerson = recordsPerPerson, - inObservation = inObservation, - standardConcept = standardConcept, - sourceVocabulary = sourceVocabulary, - domainId = domainId, - typeConcept = typeConcept, - sex = sex, - ageGroup = ageGroup) - } - ) |> + ageGroup <- omopgenerics::validateAgeGroupArgument(ageGroup, multipleAgeGroup = FALSE)[[1]] + + result <- purrr::map(omopTableName, \(x) { + if(omopgenerics::isTableEmpty(cdm[[x]])) { + cli::cli_warn(paste0(x, " omop table is empty. Returning an empty summarised omop table.")) + return(omopgenerics::emptySummarisedResult()) + } + summariseClinicalRecord( + x, + cdm = cdm, + recordsPerPerson = recordsPerPerson, + inObservation = inObservation, + standardConcept = standardConcept, + sourceVocabulary = sourceVocabulary, + domainId = domainId, + typeConcept = typeConcept, + sex = sex, + ageGroup = ageGroup + ) + }) |> dplyr::bind_rows() return(result) } #' @noRd -summariseClinicalRecord <- function(omopTableName, cdm, recordsPerPerson, - inObservation, standardConcept, - sourceVocabulary, domainId, typeConcept, - sex, ageGroup, call = parent.frame(3)) { +summariseClinicalRecord <- function(omopTableName, + cdm, + recordsPerPerson, + inObservation, + standardConcept, + sourceVocabulary, + domainId, + typeConcept, + sex, + ageGroup, + call = parent.frame(3)) { tablePrefix <- omopgenerics::tmpPrefix() # Initial checks omopgenerics::assertClass(cdm[[omopTableName]], "omop_table", call = call) - date <- startDate(omopgenerics::tableName(cdm[[omopTableName]])) + date <- startDate(omopTableName) omopTable <- cdm[[omopTableName]] |> dplyr::ungroup() @@ -115,21 +123,21 @@ summariseClinicalRecord <- function(omopTableName, cdm, recordsPerPerson, omopTable <- filterPersonId(omopTable) |> addStrataToOmopTable(date, ageGroup, sex) - if ("observation_period" == omopTableName) { - if(standardConcept){ - if(!missing(standardConcept)){ + if ("observation_period" == omopTableName) { + if (standardConcept) { + if (!missing(standardConcept)) { cli::cli_inform("standardConcept turned to FALSE for observation_period OMOP table", call = call) } standardConcept <- FALSE } - if(sourceVocabulary){ - if(!missing(sourceVocabulary)){ + if (sourceVocabulary) { + if (!missing(sourceVocabulary)) { cli::cli_inform("sourceVocabulary turned to FALSE for observation_period OMOP table", call = call) } sourceVocabulary <- FALSE } - if(domainId){ - if(!missing(domainId)){ + if (domainId) { + if (!missing(domainId)) { cli::cli_inform("domainId turned to FALSE for observation_period OMOP table", call = call) } domainId <- FALSE @@ -137,32 +145,20 @@ summariseClinicalRecord <- function(omopTableName, cdm, recordsPerPerson, } strata <- getStrataList(sex, ageGroup) - - peopleStrata <- suppressWarnings(addStrataToPeopleInObservation(cdm, ageGroup, sex, tablePrefix)) - - people <- getNumberPeopleInCdm(cdm, strata, peopleStrata) - result <- omopgenerics::emptySummarisedResult() + strata <- c(list(character()), strata) # Counts summary ---- - cli::cli_inform(c("i" = "Summarising table counts")) - result <- result |> - addCounts(strata, omopTable) |> - addSubjectsPercentage(omopTable, people, strata) - - # Records per person summary ---- - if(!is.null(recordsPerPerson)){ - cli::cli_inform(c("i" = "Summarising records per person")) - result <- result |> - addRecordsPerPerson(omopTable, recordsPerPerson, cdm, peopleStrata, strata) - } - - denominator <- result |> - dplyr::filter(.data$variable_name == "number records") |> - dplyr::collect("strata_name", "strata_level", "estimate_value") + cli::cli_inform(c("i" = "Summarising table counts and records per person")) + result <- summariseRecordsPerPerson( + omopTable, date, sex, ageGroup, recordsPerPerson) # Summary concepts ---- if (inObservation | standardConcept | sourceVocabulary | domainId | typeConcept) { + denominator <- result |> + dplyr::filter(.data$variable_name == "number records") |> + dplyr::select("strata_name", "strata_level", "estimate_value") + variables <- columnsVariables( inObservation, standardConcept, sourceVocabulary, domainId, typeConcept ) @@ -172,12 +168,11 @@ summariseClinicalRecord <- function(omopTableName, cdm, recordsPerPerson, result <- result |> dplyr::bind_rows( omopTable |> - addVariables(variables, strata) |> - dplyr::group_by(dplyr::across(dplyr::all_of(variables)), .data$age_group, .data$sex) |> - dplyr::tally() |> + addVariables(variables) |> + dplyr::group_by(dplyr::across(dplyr::everything())) |> + dplyr::summarise(n = as.integer(dplyr::n()), .groups = "drop") |> dplyr::collect() |> - dplyr::mutate("n" = as.integer(.data$n)) |> - summaryData(variables, cdm, denominator, result) + summaryData(denominator, strata) ) } @@ -203,33 +198,128 @@ summariseClinicalRecord <- function(omopTableName, cdm, recordsPerPerson, # Functions ----- getStrataList <- function(sex, ageGroup){ + omopgenerics::combineStrata(c("sex"[sex], "age_group"[!is.null(ageGroup)])) +} + +summariseRecordsPerPerson <- function(omopTable, date, sex, ageGroup, recordsPerPerson) { + # get strata + strataCols <- c("sex"[sex], "age_group"[!is.null(ageGroup)]) + + cdm <- omopgenerics::cdmReference(omopTable) + tablePrefix <- omopgenerics::tmpPrefix() + nm <- omopgenerics::uniqueTableName(tablePrefix) + + # denominator + demographics <- CohortConstructor::demographicsCohort( + cdm = cdm, name = nm, ageRange = ageGroup + ) |> + suppressMessages() + set <- omopgenerics::settings(demographics) + if (sex) demographics <- PatientProfiles::addSexQuery(demographics) + if (is.null(ageGroup)) { + set <- set |> dplyr::select("cohort_definition_id") + } else { + set <- set |> + dplyr::left_join( + dplyr::tibble( + age_group = names(ageGroup), + age_range = purrr::map_chr(ageGroup, \(x) paste0(x[1], "_", x[2])) + ), + by = "age_range" + ) |> + dplyr::mutate(age_group = dplyr::coalesce(.data$age_group, .data$age_range)) |> + dplyr::select("cohort_definition_id", "age_group") + } + + # records per person + x <- demographics |> + dplyr::select(dplyr::any_of(c( + "cohort_definition_id", "person_id" = "subject_id", "sex" + ))) |> + dplyr::collect() |> + dplyr::left_join(set, by = "cohort_definition_id") |> + dplyr::select(!"cohort_definition_id") |> + dplyr::left_join( + omopTable |> + dplyr::group_by(dplyr::across(dplyr::all_of(c("person_id", strataCols)))) |> + dplyr::summarise(n = as.integer(dplyr::n()), .groups = "drop") |> + dplyr::collect(), + by = c("person_id", strataCols) + ) |> + dplyr::mutate(n = dplyr::coalesce(.data$n, 0L)) + + omopgenerics::dropTable(cdm = cdm, name = dplyr::starts_with(tablePrefix)) + + result <- list() - strata <- as.character() + result[["overall"]] <- summariseCounts(x, character(), recordsPerPerson) - if(!is.null(ageGroup)){ - strata <- append(strata, "age_group") + if (!is.null(ageGroup)) { + result[["age_group"]] <- x |> + summariseCounts(c("age_group"), recordsPerPerson) } - if(sex){ - strata <- append(strata, "sex") + if (sex) { + result[["sex"]] <- x |> + summariseCounts(c("sex"), recordsPerPerson) } - strata <- omopgenerics::combineStrata(levels = strata) - return(strata) + if (!is.null(ageGroup) & sex) { + result[["age_group_sex"]] <- x |> + summariseCounts(c("age_group", "sex"), recordsPerPerson) + } + + result <- result |> + dplyr::bind_rows() |> + dplyr::mutate( + variable_name = dplyr::if_else( + .data$variable_name == "n", + dplyr::if_else(.data$estimate_name == "sum", "number records", "records_per_person"), + .data$variable_name + ), + estimate_name = dplyr::if_else( + .data$variable_name == "number records", "count", .data$estimate_name + ) + ) + + return(result) +} +summariseCounts <- function(x, strata, recordsPerPerson) { + x |> + dplyr::group_by(dplyr::across(dplyr::all_of(c("person_id", strata)))) |> + dplyr::summarise(n = sum(.data$n), .groups = "drop") |> + dplyr::mutate(number_subjects = dplyr::if_else(.data$n == 0, 0L, 1L)) |> + dplyr::select(!"person_id") |> + PatientProfiles::summariseResult( + group = character(), + includeOverallGroup = FALSE, + strata = strata, + includeOverallStrata = FALSE, + counts = FALSE, + variables = list("number_subjects", "n"), + estimates = list(c("count", "percentage"), c(recordsPerPerson, "sum")) + ) |> + suppressMessages() } -getNumberPeopleInCdm <- function(cdm, strata, peopleStrata){ +getNumberPeopleInCdm <- function(cdm, ageGroup, sex, strata) { + tablePrefix <- omopgenerics::tmpPrefix() - peopleStrata |> - dplyr::select(-c("observation_period_start_date","observation_period_end_date")) |> - dplyr::inner_join(cdm[["person"]] |> dplyr::select("person_id"), by = "person_id") |> + x <- cdm |> + addStrataToPeopleInObservation(ageGroup, sex, tablePrefix) |> dplyr::collect() |> # https://github.com/darwin-eu-dev/PatientProfiles/issues/706 - PatientProfiles::summariseResult(strata = strata, - includeOverallStrata = TRUE, - counts = TRUE, - estimates = c("")) |> + PatientProfiles::summariseResult( + strata = strata, + includeOverallStrata = TRUE, + counts = TRUE, + estimates = character() + ) |> suppressMessages() |> dplyr::filter(.data$variable_name != "number records") + + omopgenerics::dropSourceTable(cdm = cdm, name = dplyr::starts_with(tablePrefix)) + + return(x) } addCounts <- function(result, strata, omopTable){ @@ -302,7 +392,7 @@ addRecordsPerPerson <- function(result, omopTable, recordsPerPerson, cdm, people ) } -addVariables <- function(x, variables, strata) { +addVariables <- function(x, variables) { name <- omopgenerics::tableName(x) @@ -320,7 +410,7 @@ addVariables <- function(x, variables, strata) { cdm <- omopgenerics::cdmReference(x) x <- x |> - dplyr::select(dplyr::all_of(newNames), "age_group", "sex") + dplyr::select(dplyr::all_of(newNames), dplyr::any_of(c("age_group", "sex"))) # Domain and standard ---- if (any(c("domain_id", "standard") %in% variables)) { @@ -381,27 +471,9 @@ addVariables <- function(x, variables, strata) { } x <- x |> - dplyr::select(dplyr::all_of(variables), "age_group", "sex") |> + dplyr::select(dplyr::all_of(variables), dplyr::any_of(c("age_group", "sex"))) |> dplyr::mutate(dplyr::across(dplyr::everything(), ~as.character(.))) - # Create overall groups - This chunk will need efficiency improvement - if(length(strata) == 3){ - x <- x |> - dplyr::union_all( - x |> - dplyr::mutate(age_group = "overall") - ) |> - dplyr::union_all( - x |> - dplyr::mutate(sex = "overall") - ) |> - dplyr::union_all( - x |> - dplyr::mutate(sex = "overall") |> - dplyr::mutate(age_group = "overall") - ) - - } return(x) } @@ -411,7 +483,10 @@ columnsVariables <- function(inObservation, standardConcept, sourceVocabulary, d )] } -summaryData <- function(x, variables, cdm, denominator, result) { +summaryData <- function(x, denominator, strata) { + + variables <- colnames(x)[!c("age_group", "sex") %in% colnames(x)] + results <- list() # in observation ---- @@ -420,7 +495,7 @@ summaryData <- function(x, variables, cdm, denominator, result) { dplyr::mutate("in_observation" = dplyr::if_else( !is.na(.data$in_observation), "Yes", "No" )) |> - formatResults("In observation", "in_observation", denominator, result) + formatResults("In observation", "in_observation", denominator, strata) } # standard ----- @@ -488,47 +563,47 @@ summaryData <- function(x, variables, cdm, denominator, result) { return(results) } -formatResults <- function(x, variableName, variableLevel, denominator, result) { +formatResults <- function(x, variableName, variableLevel, denominator, strata) { denominator <- denominator |> dplyr::select("strata_name", "strata_level", "denominator" = "estimate_value") |> visOmopResults::splitStrata() - if(!"age_group" %in% colnames(denominator)){ - denominator <- denominator |> - dplyr::mutate("age_group" = "overall") - } + strataCols <- unique(unlist(strata)) - if(!"sex" %in% colnames(denominator)){ - denominator <- denominator |> - dplyr::mutate("sex" = "overall") + result <- list() + for (strat in strata) { + res <- x |> + dplyr::group_by(dplyr::across(dplyr::all_of(c(variableLevel, strat)))) |> + dplyr::summarise("count" = sum(.data$n), .groups = "drop") + for (col in strataCols) { + if (!col %in% colnames(res)) { + res <- res |> dplyr::mutate(!!col := "overall") + } + } + result[[paste0(strat, collapse = "_")]] <- res |> + dplyr::inner_join(denominator, by = strataCols) |> + dplyr::mutate("percentage" = 100 * .data$count / as.numeric(.data$denominator)) |> + dplyr::mutate(dplyr::across(dplyr::everything(), as.character)) |> + tidyr::pivot_longer( + cols = c("count", "percentage"), + names_to = "estimate_name", + values_to = "estimate_value" + ) |> + dplyr::mutate( + "variable_name" = .env$variableName, + "variable_level" = as.character(.data[[variableLevel]]), + "estimate_type" = dplyr::if_else( + .data$estimate_name == "count", "integer", "percentage" + ) + ) |> + visOmopResults::uniteStrata(cols = strataCols) |> + dplyr::select( + "strata_name", "strata_level", "variable_name", "variable_level", + "estimate_name", "estimate_type", "estimate_value" + ) |> + dplyr::ungroup() } - x |> - dplyr::group_by(dplyr::across(dplyr::all_of(c(variableLevel,"age_group","sex")))) |> - dplyr::summarise("count" = sum(.data$n), .groups = "drop") |> - dplyr::inner_join( - denominator, - by = c("age_group","sex") - ) |> - dplyr::mutate("percentage" = 100 * .data$count / as.numeric(.data$denominator)) |> - dplyr::mutate(dplyr::across(dplyr::everything(), as.character)) |> - tidyr::pivot_longer( - cols = c("count", "percentage"), - names_to = "estimate_name", - values_to = "estimate_value" - ) |> - dplyr::mutate( - "variable_name" = .env$variableName, - "variable_level" = as.character(.data[[variableLevel]]), - "estimate_type" = dplyr::if_else( - .data$estimate_name == "count", "integer", "percentage" - ) - ) |> - visOmopResults::uniteStrata(cols = c("age_group","sex")) |> - dplyr::select( - "strata_name", "strata_level", "variable_name", "variable_level", - "estimate_name", "estimate_type", "estimate_value" - ) |> - dplyr::ungroup() + dplyr::bind_rows(result) } diff --git a/R/summariseInObservation.R b/R/summariseInObservation.R index 4345bc6..2b90b99 100644 --- a/R/summariseInObservation.R +++ b/R/summariseInObservation.R @@ -258,55 +258,56 @@ createSummarisedResultObservationPeriod <- function(result, observationPeriod, n return(result) } -addStrataToPeopleInObservation <- function(cdm, ageGroup, sex, tablePrefix){ - demographics_table <- suppressWarnings(suppressMessages( - cdm |> - CohortConstructor::demographicsCohort(name = paste0(tablePrefix, "demographics_table"), - sex = NULL, - ageRange = ageGroup, - minPriorObservation = NULL) - )) - - if(is.null(ageGroup)){ - demographics <- demographics_table |> - dplyr::rename("observation_period_start_date" = "cohort_start_date", - "observation_period_end_date" = "cohort_end_date", - "person_id" = "subject_id") |> - dplyr::select(-c("cohort_definition_id")) |> - dplyr::mutate("age_group" = "overall") |> - dplyr::compute(temporary = FALSE, name = paste0(tablePrefix, "demographics")) - }else{ - age_tibble <- dplyr::tibble( - "age_range" = gsub(",","_",gsub("\\)","",gsub("c\\(","",gsub(" ","",ageGroup)))), - "age_group" = names(ageGroup) - ) +addStrataToPeopleInObservation <- function(cdm, ageGroup, sex, tablePrefix) { + demographics <- cdm |> + CohortConstructor::demographicsCohort( + name = paste0(tablePrefix, "demographics_table"), + sex = NULL, + ageRange = ageGroup, + minPriorObservation = NULL + ) |> + suppressMessages() + + if (sex) { + demographics <- demographics |> + PatientProfiles::addSexQuery() + } else { + demographics <- demographics |> + dplyr::mutate("sex" = "overall") + } - settings <- demographics_table |> - CDMConnector::settings() |> - dplyr::inner_join(age_tibble, by = "age_range") |> - dplyr::select("cohort_definition_id","age_group") - - cdm <- cdm |> - omopgenerics::insertTable(name = paste0(tablePrefix, "settings"), table = settings) - - demographics <- demographics_table |> - dplyr::inner_join(cdm[[paste0(tablePrefix,"settings")]], by = "cohort_definition_id") |> - dplyr::rename("observation_period_start_date" = "cohort_start_date", - "observation_period_end_date" = "cohort_end_date", - "person_id" = "subject_id") |> - dplyr::select(-c("cohort_definition_id")) |> - dplyr::inner_join( - cdm[["person"]] |> dplyr::select("person_id"), by = "person_id" + if (!is.null(ageGroup)) { + set <- omopgenerics::settings(demographics) |> + dplyr::select("cohort_definition_id", dplyr::any_of("age_range")) + set <- set |> + dplyr::left_join( + dplyr::tibble( + "age_range" = purrr::map_chr(ageGroup, \(x) paste0(x[1], "_", x[2])), + "age_group" = names(ageGroup) + ), + by = "age_range" ) |> - dplyr::compute(name = paste0(tablePrefix, "demographics"), temporary = FALSE) + dplyr::mutate("age_group" = dplyr::if_else( + is.na(.data$age_group), .data$age_range, .data$age_group + )) |> + dplyr::select(!"age_range") + nm <- paste0(tablePrefix, "_settings") + cdm <- omopgenerics::insertTable(cdm = cdm, name = nm, table = set) + demographics <- demographics |> + dplyr::left_join(cdm[[nm]], by = "cohort_deinition_id") + } else { + demographics <- demographics |> + dplyr::mutate("age_group" = "overall") } - - if(sex){ - demographics <- demographics |> PatientProfiles::addSexQuery() - }else{ - demographics <- demographics |> dplyr::mutate(sex = "overall") - } + nm <- paste0(tablePrefix, "_demographics") + demographics <- demographics |> + dplyr::select( + "observation_period_start_date" = "cohort_start_date", + "observation_period_end_date" = "cohort_end_date", + "person_id" = "subject_id", "age_group", "sex" + ) |> + dplyr::compute(name = nm, temporary = FALSE) return(demographics) } diff --git a/R/summariseObservationPeriod.R b/R/summariseObservationPeriod.R index 68f2b62..81a0dd6 100644 --- a/R/summariseObservationPeriod.R +++ b/R/summariseObservationPeriod.R @@ -32,7 +32,7 @@ summariseObservationPeriod <- function(observationPeriod, "median", "q75", "q95", "max", "density"), ageGroup = NULL, - sex = FALSE){ + sex = FALSE) { # input checks omopgenerics::assertClass(observationPeriod, class = "omop_table") omopgenerics::assertTrue(omopgenerics::tableName(observationPeriod) == "observation_period") diff --git a/R/summariseRecordCount.R b/R/summariseRecordCount.R index 8c3db97..0209ab3 100644 --- a/R/summariseRecordCount.R +++ b/R/summariseRecordCount.R @@ -132,20 +132,19 @@ filterPersonId <- function(omopTable){ return(omopTable) } -addStrataToOmopTable <- function(omopTable, date, ageGroup, sex){ - suppressWarnings(omopTable |> - dplyr::mutate(sex = "overall") |> - dplyr::mutate(age_group = "overall") |> - PatientProfiles::addDemographicsQuery(indexDate = date, - age = FALSE, - ageGroup = ageGroup, - missingAgeGroupValue = "unknown", - sex = sex, - missingSexValue = "unknown", - priorObservation = FALSE, - futureObservation = FALSE, - dateOfBirth = FALSE)) - +addStrataToOmopTable <- function(omopTable, date, ageGroup, sex) { + omopTable |> + PatientProfiles::addDemographicsQuery( + indexDate = date, + age = FALSE, + ageGroup = ageGroup, + missingAgeGroupValue = "unknown", + sex = sex, + missingSexValue = "unknown", + priorObservation = FALSE, + futureObservation = FALSE, + dateOfBirth = FALSE + ) } filterInObservation <- function(x, indexDate){ From 57175ec77536425cb535933ba7647c099ce22e5c Mon Sep 17 00:00:00 2001 From: catalamarti Date: Fri, 18 Oct 2024 13:58:19 +0100 Subject: [PATCH 2/7] Update summariseClinicalRecords.R --- R/summariseClinicalRecords.R | 38 ++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/R/summariseClinicalRecords.R b/R/summariseClinicalRecords.R index 51935d9..46d5102 100644 --- a/R/summariseClinicalRecords.R +++ b/R/summariseClinicalRecords.R @@ -178,11 +178,13 @@ summariseClinicalRecord <- function(omopTableName, # Format output as a summarised result result <- result |> - tidyr::fill("result_id", "cdm_name", "group_name", "group_level", - "additional_name", "additional_level", .direction = "downup") |> dplyr::mutate( + "result_id" = 1L, + "cdm_name" = omopgenerics::cdmName(cdm), "group_name" = "omop_table", - "group_level" = omopgenerics::tableName(omopTable) + "group_level" = omopTableName, + "additional_name" = "overall", + "additional_level" = "overall" ) |> omopgenerics::newSummarisedResult(settings = dplyr::tibble( "result_id" = 1L, @@ -485,39 +487,41 @@ columnsVariables <- function(inObservation, standardConcept, sourceVocabulary, d summaryData <- function(x, denominator, strata) { - variables <- colnames(x)[!c("age_group", "sex") %in% colnames(x)] + cols <- colnames(x) results <- list() # in observation ---- - if ("in_observation" %in% variables) { + if ("in_observation" %in% cols) { results[["obs"]] <- x |> dplyr::mutate("in_observation" = dplyr::if_else( - !is.na(.data$in_observation), "Yes", "No" + .data$in_observation == "1", "Yes", "No" )) |> formatResults("In observation", "in_observation", denominator, strata) } # standard ----- - if ("standard" %in% variables) { + if ("standard" %in% cols) { results[["standard"]] <- x |> - formatResults("Standard concept", "standard", denominator, result) + formatResults("Standard concept", "standard", denominator, strata) } # source ---- - if ("source" %in% variables) { - results[["source"]] <- x |> formatResults("Source vocabulary", "source", denominator, result) + if ("source" %in% cols) { + results[["source"]] <- x |> + formatResults("Source vocabulary", "source", denominator, strata) } # domain ---- - if ("domain_id" %in% variables) { - results[["domain"]] <- x |> formatResults("Domain", "domain_id", denominator, result) + if ("domain_id" %in% cols) { + results[["domain"]] <- x |> + formatResults("Domain", "domain_id", denominator, strata) } # type ---- - if ("type" %in% variables) { + if ("type" %in% cols) { xx <- x |> - formatResults("Type concept id", "type", denominator, result) |> + formatResults("Type concept id", "type", denominator, strata) |> dplyr::left_join( conceptTypes |> dplyr::select( @@ -554,11 +558,11 @@ summaryData <- function(x, denominator, strata) { paste0(.data$new_variable_level, " (", .data$variable_level, ")") )) } - results[["type"]] <- xx |> dplyr::select(-"new_variable_level") + results[["type"]] <- xx |> + dplyr::select(-"new_variable_level") } - results <- results |> - dplyr::bind_rows() + results <- dplyr::bind_rows(results) return(results) } From 55fd2516f273f57fdb55eed93ef209d58c7b6bea Mon Sep 17 00:00:00 2001 From: catalamarti Date: Fri, 18 Oct 2024 16:26:45 +0100 Subject: [PATCH 3/7] Update summariseClinicalRecords.R --- R/summariseClinicalRecords.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/summariseClinicalRecords.R b/R/summariseClinicalRecords.R index 46d5102..52090d6 100644 --- a/R/summariseClinicalRecords.R +++ b/R/summariseClinicalRecords.R @@ -92,7 +92,7 @@ summariseClinicalRecords <- function(cdm, ageGroup = ageGroup ) }) |> - dplyr::bind_rows() + omopgenerics::bind() return(result) } @@ -148,7 +148,7 @@ summariseClinicalRecord <- function(omopTableName, strata <- c(list(character()), strata) # Counts summary ---- - cli::cli_inform(c("i" = "Summarising table counts and records per person")) + cli::cli_inform(c("i" = "Summarising {.pkg {omopTableName}} counts and records per person")) result <- summariseRecordsPerPerson( omopTable, date, sex, ageGroup, recordsPerPerson) @@ -163,7 +163,7 @@ summariseClinicalRecord <- function(omopTableName, inObservation, standardConcept, sourceVocabulary, domainId, typeConcept ) - cli::cli_inform(c("i" = "Summarising {variables} information")) + cli::cli_inform(c("i" = "Summarising {.pkg {omopTableName}}: {.var {variables}}.")) result <- result |> dplyr::bind_rows( @@ -172,7 +172,7 @@ summariseClinicalRecord <- function(omopTableName, dplyr::group_by(dplyr::across(dplyr::everything())) |> dplyr::summarise(n = as.integer(dplyr::n()), .groups = "drop") |> dplyr::collect() |> - summaryData(denominator, strata) + summaryData(denominator, strata, cdm) ) } @@ -485,7 +485,7 @@ columnsVariables <- function(inObservation, standardConcept, sourceVocabulary, d )] } -summaryData <- function(x, denominator, strata) { +summaryData <- function(x, denominator, strata, cdm) { cols <- colnames(x) From 772f0152e018a4ef71ae190dba252227826aab59 Mon Sep 17 00:00:00 2001 From: catalamarti Date: Fri, 25 Oct 2024 11:36:55 +0100 Subject: [PATCH 4/7] solve counts --- R/summariseClinicalRecords.R | 1 + .../testthat/test-summariseClinicalRecords.R | 19 +++++++++++-------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/R/summariseClinicalRecords.R b/R/summariseClinicalRecords.R index 52090d6..f78d56e 100644 --- a/R/summariseClinicalRecords.R +++ b/R/summariseClinicalRecords.R @@ -238,6 +238,7 @@ summariseRecordsPerPerson <- function(omopTable, date, sex, ageGroup, recordsPer dplyr::select(dplyr::any_of(c( "cohort_definition_id", "person_id" = "subject_id", "sex" ))) |> + dplyr::distinct() |> dplyr::collect() |> dplyr::left_join(set, by = "cohort_definition_id") |> dplyr::select(!"cohort_definition_id") |> diff --git a/tests/testthat/test-summariseClinicalRecords.R b/tests/testthat/test-summariseClinicalRecords.R index 82b65cf..f2b76f4 100644 --- a/tests/testthat/test-summariseClinicalRecords.R +++ b/tests/testthat/test-summariseClinicalRecords.R @@ -193,14 +193,17 @@ test_that("summariseClinicalRecords() sex and ageGroup argument work", { cdm <- CDMConnector::copyCdmTo( con = connection(), cdm = cdm, schema = schema()) - result <- summariseClinicalRecords(cdm, "observation_period", - inObservation = FALSE, - standardConcept = FALSE, - sourceVocabulary = FALSE, - domainId = FALSE, - typeConcept = FALSE, - sex = TRUE, - ageGroup = list("old" = c(10, Inf), "young" = c(0, 9))) + result <- summariseClinicalRecords( + cdm = cdm, + omopTableName = "observation_period", + inObservation = FALSE, + standardConcept = FALSE, + sourceVocabulary = FALSE, + domainId = FALSE, + typeConcept = FALSE, + sex = TRUE, + ageGroup = list("old" = c(10, Inf), "young" = c(0, 9)) + ) # Check num records records <- result |> From b49a8816ab125daf2a52db335ba87c2bde5ba624 Mon Sep 17 00:00:00 2001 From: catalamarti Date: Fri, 25 Oct 2024 16:08:02 +0100 Subject: [PATCH 5/7] typo --- R/summariseInObservation.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/summariseInObservation.R b/R/summariseInObservation.R index 2b90b99..a91aa6f 100644 --- a/R/summariseInObservation.R +++ b/R/summariseInObservation.R @@ -294,7 +294,7 @@ addStrataToPeopleInObservation <- function(cdm, ageGroup, sex, tablePrefix) { nm <- paste0(tablePrefix, "_settings") cdm <- omopgenerics::insertTable(cdm = cdm, name = nm, table = set) demographics <- demographics |> - dplyr::left_join(cdm[[nm]], by = "cohort_deinition_id") + dplyr::left_join(cdm[[nm]], by = "cohort_definition_id") } else { demographics <- demographics |> dplyr::mutate("age_group" = "overall") From ea43a55a0ecedeb73de24a0ac106c0705ff13f3d Mon Sep 17 00:00:00 2001 From: catalamarti Date: Mon, 28 Oct 2024 10:03:45 +0000 Subject: [PATCH 6/7] Update DESCRIPTION --- DESCRIPTION | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 3b4b93f..95671c2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -58,7 +58,7 @@ Imports: dplyr, ggplot2, omopgenerics (>= 0.3.1), - PatientProfiles (>= 1.2.0.900), + PatientProfiles (>= 1.2.1), purrr, rlang, stringr, @@ -71,5 +71,4 @@ URL: https://OHDSI.github.io/OmopSketch/ BugReports: https://github.com/OHDSI/OmopSketch/issues VignetteBuilder: knitr Remotes: - darwin-eu-dev/omopgenerics@mah_vl, - darwin-eu-dev/PatientProfiles@issue_705 + darwin-eu-dev/omopgenerics@mah_vl From 97136423506ab847f9e8c3ebccca319602c8d780 Mon Sep 17 00:00:00 2001 From: catalamarti Date: Mon, 28 Oct 2024 10:24:34 +0000 Subject: [PATCH 7/7] strata order --- R/summariseClinicalRecords.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/summariseClinicalRecords.R b/R/summariseClinicalRecords.R index a8a4a21..423f666 100644 --- a/R/summariseClinicalRecords.R +++ b/R/summariseClinicalRecords.R @@ -200,7 +200,7 @@ summariseClinicalRecord <- function(omopTableName, # Functions ----- getStrataList <- function(sex, ageGroup){ - omopgenerics::combineStrata(c("sex"[sex], "age_group"[!is.null(ageGroup)])) + omopgenerics::combineStrata(c("age_group"[!is.null(ageGroup)], "sex"[sex])) } summariseRecordsPerPerson <- function(omopTable, date, sex, ageGroup, recordsPerPerson) {