Skip to content

Commit

Permalink
Merge pull request #221 from OHDSI/issue_197
Browse files Browse the repository at this point in the history
Issue 197
  • Loading branch information
martaalcalde authored Nov 5, 2024
2 parents aa73e5d + 5af3d30 commit dcb0d61
Show file tree
Hide file tree
Showing 2 changed files with 242 additions and 0 deletions.
183 changes: 183 additions & 0 deletions R/summariseAllConceptCounts.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@

my_getStrataList <- function(sex = FALSE, ageGroup = NULL, year = FALSE){

strata <- as.character()

if(!is.null(ageGroup)){
strata <- append(strata, "age_group")
}

if(sex){
strata <- append(strata, "sex")
}
if(year){
strata <- append(strata, "year")
}
return(strata)
}

checkFeasibility <- function(omopTable, tableName, conceptId){

if (omopgenerics::isTableEmpty(omopTable)){
cli::cli_warn(paste0(tableName, " omop table is empty."))
return(NULL)
}

if (is.na(conceptId)){
cli::cli_warn(paste0(tableName, " omop table doesn't contain standard concepts."))
return(NULL)
}

y <- omopTable |>
dplyr::filter(!is.na(.data[[conceptId]]))

if (omopgenerics::isTableEmpty(y)){
cli::cli_warn(paste0(tableName, " omop table doesn't contain standard concepts."))
return(NULL)
}
return(TRUE)
}
#' Summarise concept use in patient-level data
#'
#' @param cdm A cdm object
#' @param omopTableName A character vector of the names of the tables to
#' summarise in the cdm object.
#' @param countBy Either "record" for record-level counts or "person" for
#' person-level counts
#' @param year TRUE or FALSE. If TRUE code use will be summarised by year.
#' @param sex TRUE or FALSE. If TRUE code use will be summarised by sex.
#' @param ageGroup A list of ageGroup vectors of length two. Code use will be
#' thus summarised by age groups.
#' @return A summarised_result object with results overall and, if specified, by
#' strata.
#' @export
summariseAllConceptCounts <- function(cdm,
omopTableName,
countBy = "record",
year = FALSE,
sex = FALSE,
ageGroup = NULL){

omopgenerics::validateCdmArgument(cdm)
checkCountBy(countBy)
omopgenerics::assertLogical(year, length = 1)
omopgenerics::assertLogical(sex, length = 1)
omopgenerics::assertChoice(omopTableName,choices = omopgenerics::omopTables(), unique = TRUE)

ageGroup <- omopgenerics::validateAgeGroupArgument(ageGroup, ageGroupName = "")[[1]]

strata <- my_getStrataList(sex = sex, year = year, ageGroup = ageGroup)

stratification <- omopgenerics::combineStrata(strata)

result_tables <- purrr::map(omopTableName, function(table){



omopTable <- cdm[[table]] |>
dplyr::ungroup()


conceptId <- standardConcept(omopgenerics::tableName(omopTable))

if (is.null(checkFeasibility(omopTable, table, conceptId))){
return(NULL)
}


indexDate <- startDate(omopgenerics::tableName(omopTable))

x <- omopTable |>
dplyr::filter(!is.na(.data[[conceptId]])) |>
dplyr::left_join(
cdm$concept |> dplyr::select("concept_id", "concept_name"),
by = stats::setNames("concept_id", conceptId)) |>
PatientProfiles::addDemographicsQuery(age = FALSE,
ageGroup = ageGroup,
sex = sex,
indexDate = indexDate, priorObservation = FALSE, futureObservation = FALSE)
if (year){
x <- x|> dplyr::mutate(year = as.character(clock::get_year(.data[[indexDate]])))
}

level <- c(conceptId, "concept_name")

groupings <- c(list(level), purrr::map(stratification, ~ c(level, .x)))

result <- list()
if ("record" %in% countBy){

stratified_result <- x |>
dplyr::group_by(dplyr::across(dplyr::all_of(c(level,strata)))) |>
dplyr::summarise("estimate_value" = as.integer(dplyr::n()), .groups = "drop")|>
dplyr::collect()


grouped_results <- purrr::map(groupings, \(g) {
stratified_result |>
dplyr::group_by(dplyr::across(dplyr::all_of(g))) |>
dplyr::summarise("estimate_value" = as.integer(sum(.data$estimate_value, na.rm = TRUE)), .groups = "drop")

})

result_record <- purrr::reduce(grouped_results, dplyr::bind_rows)|>
dplyr::mutate(dplyr::across(dplyr::all_of(strata), ~ dplyr::coalesce(., "overall")))|>
dplyr::mutate("estimate_name" = "record_count")
result<-dplyr::bind_rows(result,result_record)
}

if ("person" %in% countBy){

grouped_results <- purrr::map(groupings, \(g) {
x |>
dplyr::group_by(dplyr::across(dplyr::all_of(g))) |>
dplyr::summarise("estimate_value" = as.integer(dplyr::n()), .groups = "drop")|>
dplyr::collect()
})

result_person <- purrr::reduce(grouped_results, dplyr::bind_rows) |>
dplyr::mutate(dplyr::across(dplyr::all_of(strata), ~ dplyr::coalesce(., "overall"))) |>
dplyr::mutate("estimate_name" = "person_count")
result<-dplyr::bind_rows(result,result_person)
}
result<- result |>
dplyr::mutate("omop_table" = table,
"variable_level" = as.character(.data[[conceptId]])) |>
dplyr::select(!c(conceptId))
return(result)
})
if (rlang::is_empty(purrr::compact(result_tables))){
return(omopgenerics::emptySummarisedResult())
}

sr <-purrr::compact(result_tables) |>
purrr::reduce(dplyr::union)|>
dplyr::mutate(
result_id = 1L,
cdm_name = omopgenerics::cdmName(cdm)
) |>
visOmopResults::uniteGroup(cols = "omop_table") |>
visOmopResults::uniteStrata(cols = strata) |>
visOmopResults::uniteAdditional() |>
dplyr::mutate(
"estimate_value" = as.character(.data$estimate_value),
"estimate_type" = "integer"
) |>
dplyr::rename("variable_name" = "concept_name")
# |>
# dplyr::select(!c())


settings <- dplyr::tibble(
result_id = unique(sr$result_id),
package_name = "omopSketch",
package_version = as.character(utils::packageVersion("OmopSketch")),
result_type = "summarise_all_concept_counts"
)
sr <- sr |>
omopgenerics::newSummarisedResult(settings = settings)

return(sr)

}

59 changes: 59 additions & 0 deletions tests/testthat/test-summariseAllConceptCounts.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
test_that("summariseAllConceptCount works", {
skip_on_cran()

cdm <- cdmEunomia()

expect_true(inherits(summariseAllConceptCounts(cdm, "drug_exposure"), "summarised_result"))
expect_warning(summariseAllConceptCounts(cdm, "observation_period"))
expect_no_error(x <- summariseAllConceptCounts(cdm, "visit_occurrence"))
expect_no_error(summariseAllConceptCounts(cdm, "condition_occurrence", countBy = c("record", "person")))
expect_no_error(summariseAllConceptCounts(cdm, "drug_exposure"))
expect_no_error(summariseAllConceptCounts(cdm, "procedure_occurrence", countBy = "person"))
expect_warning(summariseAllConceptCounts(cdm, "device_exposure"))
expect_no_error(y <- summariseAllConceptCounts(cdm, "measurement"))
expect_no_error(summariseAllConceptCounts(cdm, "observation", year = TRUE))
expect_warning(summariseAllConceptCounts(cdm, "death"))

expect_no_error(all <- summariseAllConceptCounts(cdm, c("visit_occurrence", "measurement")))
expect_equal(all, dplyr::bind_rows(x, y))
expect_equal(summariseAllConceptCounts(cdm, "procedure_occurrence", countBy = "record"), summariseAllConceptCounts(cdm, "procedure_occurrence"))

expect_error(summariseAllConceptCounts(cdm, omopTableName = ""))
expect_error(summariseAllConceptCounts(cdm, omopTableName = "visit_occurrence", countBy = "dd"))

expect_true(summariseAllConceptCounts(cdm, "procedure_occurrence", sex = TRUE, ageGroup = list(c(0, 50), c(51, Inf))) |>
dplyr::distinct(.data$strata_level) |>
dplyr::tally() |>
dplyr::pull() == 9)

expect_true(summariseAllConceptCounts(cdm, "procedure_occurrence", ageGroup = list(c(0, 50))) |>
dplyr::distinct(.data$strata_level) |>
dplyr::tally() |>
dplyr::pull() == 3)

s <- summariseAllConceptCounts(cdm, "procedure_occurrence")
z <- summariseAllConceptCounts(cdm, "procedure_occurrence", sex = TRUE, year = TRUE, ageGroup = list(c(0, 50), c(51, Inf)))

x <- z |>
dplyr::filter(strata_level == "overall") |>
dplyr::select(variable_level, estimate_value)
s <- s |>
dplyr::select(variable_level, estimate_value)
expect_equal(x, s)

x <- z |>
dplyr::filter(strata_name == "age_group") |>
dplyr::group_by(variable_level) |>
dplyr::summarise(estimate_value = sum(as.numeric(estimate_value), na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(estimate_value = as.character(estimate_value))

p <- s |>
dplyr::select(variable_level, estimate_value)

expect_true(all.equal(
as.data.frame(x) |> dplyr::arrange(variable_level),
as.data.frame(p) |> dplyr::arrange(variable_level),
check.attributes = FALSE
))

})

0 comments on commit dcb0d61

Please sign in to comment.