Skip to content

Commit

Permalink
Merge pull request #273 from OHDSI/issue_71
Browse files Browse the repository at this point in the history
Refactor functions internally
  • Loading branch information
cecicampanile authored Dec 18, 2024
2 parents 5c31e8b + bfd24a2 commit c297a86
Show file tree
Hide file tree
Showing 42 changed files with 1,814 additions and 1,552 deletions.
2 changes: 1 addition & 1 deletion .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
^docs$
^pkgdown$
^codecov\.yml$
^Extras
^extras$
^doc$
^Meta$
^cran-comments\.md$
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
.DS_Store
docs

extras/*
Eunomia/*

inst/doc
Expand Down
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: OmopSketch
Title: Characterise Tables of an OMOP Common Data Model Instance
Version: 0.1.1
Version: 0.2.0
Authors@R: c(
person(
"Marta", "Alcalde-Herraiz",
Expand Down Expand Up @@ -63,6 +63,7 @@ Imports:
clock,
CohortConstructor (>= 0.3.1),
dplyr,
glue,
lifecycle,
omopgenerics (>= 0.3.1),
PatientProfiles (>= 1.2.1),
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,13 @@ export(tableMissingData)
export(tableObservationPeriod)
export(tableOmopSnapshot)
importFrom(dplyr,"%>%")
importFrom(lifecycle,deprecated)
importFrom(omopgenerics,bind)
importFrom(omopgenerics,exportSummarisedResult)
importFrom(omopgenerics,importSummarisedResult)
importFrom(omopgenerics,settings)
importFrom(omopgenerics,suppress)
importFrom(rlang,"%||%")
importFrom(rlang,":=")
importFrom(rlang,.data)
importFrom(rlang,.env)
6 changes: 4 additions & 2 deletions R/OmopSketch-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
"_PACKAGE"

## usethis namespace: start
#' @importFrom dplyr %>%
#' @importFrom lifecycle deprecated
#' @importFrom rlang :=
#' @importFrom rlang .data
#' @importFrom rlang .env
#' @importFrom rlang :=
#' @importFrom dplyr %>%
#' @importFrom rlang %||%
## usethis namespace: end
NULL
33 changes: 0 additions & 33 deletions R/restrictStudyPeriod.R

This file was deleted.

255 changes: 98 additions & 157 deletions R/summariseAllConceptCounts.R
Original file line number Diff line number Diff line change
@@ -1,46 +1,4 @@

my_getStrataList <- function(sex = FALSE, ageGroup = NULL, year = FALSE){

strata <- as.character()

if(!is.null(ageGroup)){
strata <- append(strata, "age_group")
}

if(sex){
strata <- append(strata, "sex")
}
if(year){
strata <- append(strata, "year")
}
return(strata)
}


checkFeasibility <- function(omopTable, tableName, conceptId){

if (omopgenerics::isTableEmpty(omopTable)){
cli::cli_warn(paste0(tableName, " omop table is empty."))
return(NULL)
}

if (is.na(conceptId)){
cli::cli_warn(paste0(tableName, " omop table doesn't contain standard concepts."))
return(NULL)
}

y <- omopTable |>
dplyr::filter(!is.na(.data[[conceptId]]))

if (omopgenerics::isTableEmpty(y)){
cli::cli_warn(paste0(tableName, " omop table doesn't contain standard concepts."))
return(NULL)
}
return(TRUE)
}



#' Summarise concept use in patient-level data
#'
#' @param cdm A cdm object
Expand All @@ -56,137 +14,120 @@ checkFeasibility <- function(omopTable, tableName, conceptId){
#' If NULL no sample is done.
#' @param dateRange A list containing the minimum and the maximum dates
#' defining the time range within which the analysis is performed.
#'
#' @return A summarised_result object with results overall and, if specified, by
#' strata.
#'
#' @export
#'
#' @examples
#' \donttest{
#' library(OmopSketch)
#' library(CDMConnector)
#' library(duckdb)
#'
#' requireEunomia()
#' con <- dbConnect(duckdb(), eunomiaDir())
#' cdm <- cdmFromCon(con = con, cdmSchema = "main", writeSchema = "main")
#'
#' summariseAllConceptCounts(cdm, "condition_occurrence")
#' }
#'
summariseAllConceptCounts <- function(cdm,
omopTableName,
countBy = "record",
year = FALSE,
sex = FALSE,
ageGroup = NULL,
sample = 1000000,
dateRange = NULL){

omopgenerics::validateCdmArgument(cdm)
omopTableName,
countBy = "record",
year = FALSE,
sex = FALSE,
ageGroup = NULL,
sample = NULL,
dateRange = NULL) {
# initial checks
cdm <- omopgenerics::validateCdmArgument(cdm)
checkCountBy(countBy)
omopgenerics::assertLogical(year, length = 1)
omopgenerics::assertLogical(sex, length = 1)
omopgenerics::assertChoice(omopTableName,choices = omopgenerics::omopTables(), unique = TRUE)

ageGroup <- omopgenerics::validateAgeGroupArgument(ageGroup, ageGroupName = "")[[1]]
omopgenerics::assertChoice(omopTableName, choices = omopgenerics::omopTables(), unique = TRUE)
ageGroup <- omopgenerics::validateAgeGroupArgument(ageGroup)
dateRange <- validateStudyPeriod(cdm, dateRange)
strata <- my_getStrataList(sex = sex, year = year, ageGroup = ageGroup)

stratification <- omopgenerics::combineStrata(strata)

result_tables <- purrr::map(omopTableName, function(table){




omopTable <- cdm[[table]] |>
dplyr::ungroup()


conceptId <- standardConcept(omopgenerics::tableName(omopTable))

if (is.null(checkFeasibility(omopTable, table, conceptId))){
return(NULL)
}

omopTable <- restrictStudyPeriod(omopTable, dateRange)
omopTable <- sampleOmopTable(omopTable, sample)

indexDate <- startDate(omopgenerics::tableName(omopTable))

x <- omopTable |>
dplyr::filter(!is.na(.data[[conceptId]])) |>
dplyr::left_join(
cdm$concept |> dplyr::select("concept_id", "concept_name"),
by = stats::setNames("concept_id", conceptId)) |>
PatientProfiles::addDemographicsQuery(age = FALSE,
ageGroup = ageGroup,
sex = sex,
indexDate = indexDate, priorObservation = FALSE, futureObservation = FALSE)
if (year){
x <- x|> dplyr::mutate(year = as.character(clock::get_year(.data[[indexDate]])))
omopgenerics::assertNumeric(sample, integerish = TRUE, min = 1, null = TRUE, length = 1)

# settings for the created results
set <- createSettings(result_type = "summarise_all_concept_counts", study_period = dateRange)

# get strata
strata <- omopgenerics::combineStrata(c(strataCols(sex = sex, ageGroup = ageGroup), "year"[year]))
concepts <- c("concept_id", "concept_name")
stratax <- c(list(concepts), purrr::map(strata, \(x) c(concepts, x)))

# how to count
counts <- c("records", "person_id")[c("record", "person") %in% countBy]

# summarise counts
resultTables <- purrr::map(omopTableName, \(table) {
# initial table
omopTable <- dplyr::ungroup(cdm[[table]])
conceptId <- omopgenerics::omopColumns(table = table, field = "standard_concept")
if (is.na(conceptId)) {
cli::cli_warn(c("!" = "No standard concept identified for {table}."))
return(NULL)
}

prefix <- omopgenerics::tmpPrefix()

# restrict study period
omopTable <- restrictStudyPeriod(omopTable, dateRange)
if (is.null(omopTable)) return(NULL)

# sample table
omopTable <- omopTable |>
sampleOmopTable(sample = sample, name = omopgenerics::uniqueTableName(prefix))

result <- omopTable |>
# add concept names
dplyr::rename(concept_id = dplyr::all_of(conceptId)) |>
dplyr::left_join(
cdm$concept |>
dplyr::select("concept_id", "concept_name"),
by = "concept_id"
) |>
# add demographics and year
addStratifications(
indexDate = omopgenerics::omopColumns(table = table, field = "start_date"),
sex = sex,
ageGroup = ageGroup,
interval = dplyr::if_else(year, "years", "overall"),
intervalName = "year",
name = omopgenerics::uniqueTableName(prefix)
) |>
# summarise results
summariseCountsInternal(stratax, counts) |>
dplyr::mutate(omop_table = .env$table)

omopgenerics::dropSourceTable(cdm = cdm, name = dplyr::starts_with(prefix))

return(result)
}) |>
purrr::compact()

if (length(resultTables) == 0) {
return(omopgenerics::emptySummarisedResult(settings = set))
}


level <- c(conceptId, "concept_name")

groupings <- c(list(level), purrr::map(stratification, ~ c(level, .x)))

result <- list()
if ("record" %in% countBy){

stratified_result <- x |>
dplyr::group_by(dplyr::across(dplyr::all_of(c(level,strata)))) |>
dplyr::summarise("estimate_value" = as.integer(dplyr::n()), .groups = "drop")|>
dplyr::collect()


grouped_results <- purrr::map(groupings, \(g) {
stratified_result |>
dplyr::group_by(dplyr::across(dplyr::all_of(g))) |>
dplyr::summarise("estimate_value" = as.integer(sum(.data$estimate_value, na.rm = TRUE)), .groups = "drop")

})

result_record <- purrr::reduce(grouped_results, dplyr::bind_rows)|>
dplyr::mutate(dplyr::across(dplyr::all_of(strata), ~ dplyr::coalesce(., "overall")))|>
dplyr::mutate("estimate_name" = "record_count")
result<-dplyr::bind_rows(result,result_record)
}

if ("person" %in% countBy){

grouped_results <- purrr::map(groupings, \(g) {
x |>
dplyr::group_by(dplyr::across(dplyr::all_of(g))) |>
dplyr::summarise("estimate_value" = as.integer(dplyr::n()), .groups = "drop")|>
dplyr::collect()
})

result_person <- purrr::reduce(grouped_results, dplyr::bind_rows) |>
dplyr::mutate(dplyr::across(dplyr::all_of(strata), ~ dplyr::coalesce(., "overall"))) |>
dplyr::mutate("estimate_name" = "person_count")
result<-dplyr::bind_rows(result,result_person)
}
result<- result |>
dplyr::mutate("omop_table" = table,
"variable_level" = as.character(.data[[conceptId]])) |>

dplyr::select(-dplyr::all_of(conceptId))
return(result)
})
if (rlang::is_empty(purrr::compact(result_tables))){
return(omopgenerics::emptySummarisedResult(settings = createSettings(result_type = "summarise_all_concept_counts", study_period = dateRange)))
}

sr <-purrr::compact(result_tables) |>
purrr::reduce(dplyr::union)|>
resultTables |>
dplyr::bind_rows() |>
dplyr::mutate(
result_id = 1L,
cdm_name = omopgenerics::cdmName(cdm)
) |>
omopgenerics::uniteGroup(cols = "omop_table") |>
omopgenerics::uniteStrata(cols = strata) |>
omopgenerics::uniteStrata(cols = unique(unlist(strata)) %||% character()) |>
omopgenerics::uniteAdditional() |>
dplyr::mutate(
"estimate_value" = as.character(.data$estimate_value),
"estimate_type" = "integer"
estimate_value = as.character(.data$estimate_value),
estimate_type = "integer",
variable_level = as.character(.data$concept_id)
) |>
dplyr::rename("variable_name" = "concept_name")
# |>
# dplyr::select(!c())


sr <- sr |>
omopgenerics::newSummarisedResult(settings = createSettings(result_type = "summarise_all_concept_counts", study_period = dateRange))

return(sr)

dplyr::rename("variable_name" = "concept_name") |>
dplyr::select(!"concept_id") |>
omopgenerics::newSummarisedResult(settings = set)
}

Loading

0 comments on commit c297a86

Please sign in to comment.