diff --git a/DESCRIPTION b/DESCRIPTION index 06b942d8..45179775 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -62,6 +62,7 @@ Imports: clock, CohortConstructor (>= 0.3.1), dplyr, + lifecycle, omopgenerics (>= 0.3.1), PatientProfiles (>= 1.2.1), purrr, diff --git a/NAMESPACE b/NAMESPACE index 45674048..b6d795f1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,7 +4,7 @@ export(bind) export(exportSummarisedResult) export(importSummarisedResult) export(mockOmopSketch) -export(plotConceptCounts) +export(plotConceptSetCounts) export(plotInObservation) export(plotObservationPeriod) export(plotRecordCount) @@ -12,6 +12,7 @@ export(settings) export(summariseAllConceptCounts) export(summariseClinicalRecords) export(summariseConceptCounts) +export(summariseConceptSetCounts) export(summariseInObservation) export(summariseMissingData) export(summariseObservationPeriod) diff --git a/R/plotConceptCounts.R b/R/plotConceptSetCounts.R similarity index 90% rename from R/plotConceptCounts.R rename to R/plotConceptSetCounts.R index baa20147..d9082935 100644 --- a/R/plotConceptCounts.R +++ b/R/plotConceptSetCounts.R @@ -1,6 +1,6 @@ -#' Plot the concept counts of a summariseConceptCounts output. +#' Plot the concept counts of a summariseConceptSetCounts output. #' -#' @param result A summarised_result object (output of summariseConceptCounts). +#' @param result A summarised_result object (output of summariseConceptSetCounts). #' @param facet Columns to face by. Formula format can be provided. See possible #' columns to face by with: `visOmopResults::tidyColumns()`. #' @param colour Columns to colour by. See possible columns to colour by with: @@ -14,8 +14,8 @@ #' cdm <- mockOmopSketch() #' #' result <- cdm |> -#' summariseConceptCounts( -#' conceptId = list( +#' summariseConceptSetCounts( +#' conceptSet= list( #' "Renal agenesis" = 194152, #' "Manic mood" = c(4226696, 4304866, 37110496, 40371897) #' ) @@ -23,11 +23,11 @@ #' #' result |> #' filter(variable_name == "Number subjects") |> -#' plotConceptCounts(facet = "codelist_name", colour = "standard_concept_name") +#' plotConceptSetCounts(facet = "codelist_name", colour = "standard_concept_name") #' #' PatientProfiles::mockDisconnect(cdm) #' } -plotConceptCounts <- function(result, +plotConceptSetCounts <- function(result, facet = NULL, colour = NULL){ @@ -39,10 +39,10 @@ plotConceptCounts <- function(result, # subset to results of interest result <- result |> - omopgenerics::filterSettings(.data$result_type == "summarise_concept_counts") + omopgenerics::filterSettings(.data$result_type == "summarise_concept_set_counts") if (nrow(result) == 0) { - cli::cli_abort(c("!" = "No records found with result_type == summarise_concept_counts")) + cli::cli_abort(c("!" = "No records found with result_type == summarise_concept_set_counts")) } # check only one estimate is contained diff --git a/R/summariseConceptCounts.R b/R/summariseConceptCounts.R index 1f26c4bc..fcadcaf2 100644 --- a/R/summariseConceptCounts.R +++ b/R/summariseConceptCounts.R @@ -39,322 +39,13 @@ summariseConceptCounts <- function(cdm, ageGroup = NULL, dateRange = NULL){ - omopgenerics::validateCdmArgument(cdm) - omopgenerics::assertList(conceptId, named = TRUE) - checkCountBy(countBy) - omopgenerics::assertChoice(countBy, choices = c("record", "person")) - countBy <- gsub("persons","subjects",paste0("number ",countBy,"s")) - x <- validateIntervals(interval) - interval <- x$interval - unitInterval <- x$unitInterval - omopgenerics::assertNumeric(unitInterval, length = 1, min = 1, na = TRUE) - omopgenerics::assertLogical(concept, length = 1) - omopgenerics::assertLogical(sex, length = 1) - ageGroup <- omopgenerics::validateAgeGroupArgument(ageGroup, ageGroupName = "")[[1]] - dateRange <- validateStudyPeriod(cdm, dateRange) - # Get all concepts in concept table if conceptId is NULL - # if(is.null(conceptId)) { - # conceptId <- cdm$concept |> - # dplyr::select("concept_name", "concept_id") |> - # dplyr::collect() |> - # dplyr::group_by(.data$concept_name) |> - # dplyr::summarise(named_vec = list(.data$concept_id)) |> - # tibble::deframe() - # } - - codeUse <- list() - cli::cli_progress_bar("Getting use of codes", total = length(conceptId)) - for(i in 1:length(conceptId)) { - cli::cli_alert_info("Getting concept counts of {names(conceptId)[i]}") - codeUse[[i]] <- getCodeUse(conceptId[i], - cdm = cdm, - countBy = countBy, - concept = concept, - interval = interval, - unitInterval = unitInterval, - sex = sex, - ageGroup = ageGroup, - dateRange = dateRange) - Sys.sleep(i/length(conceptId)) - cli::cli_progress_update() - } - codeUse <- codeUse |> - dplyr::bind_rows() - cli::cli_progress_done() - - if(nrow(codeUse) > 0) { - codeUse <- codeUse %>% - dplyr::mutate( - result_id = as.integer(1), - cdm_name = omopgenerics::cdmName(cdm) - ) - } else { - codeUse <- omopgenerics::emptySummarisedResult(settings = createSettings(result_type = "summarise_concept_counts", study_period = dateRange)) - } - - codeUse <- codeUse %>% - omopgenerics::newSummarisedResult( -createSettings(result_type = "summarise_concept_counts", study_period = dateRange) - ) - return(codeUse) -} - -getCodeUse <- function(x, - cdm, - countBy, - concept, - interval, - unitInterval, - sex, - ageGroup, - dateRange, - call = parent.frame()){ - - tablePrefix <- omopgenerics::tmpPrefix() - - omopgenerics::assertNumeric(x[[1]], integerish = TRUE) - omopgenerics::assertList(x) - - # Create code list table - tableCodelist <- paste0(tablePrefix,"codelist") - cdm <- omopgenerics::insertTable(cdm = cdm, - name = tableCodelist, - table = dplyr::tibble(concept_id = x[[1]]), - overwrite = TRUE, - temporary = FALSE) - - cdm[[tableCodelist]] <- cdm[[tableCodelist]] %>% - dplyr::left_join( - cdm[["concept"]] %>% dplyr::select("concept_id", "domain_id"), - by = "concept_id" - ) - - # Create domains table - tableDomainsData <- paste0(tablePrefix,"domains_data") - cdm <- omopgenerics::insertTable(cdm = cdm, - name = tableDomainsData, - table = tables, - overwrite = TRUE, - temporary = FALSE) - - cdm[[tableCodelist]] <- cdm[[tableCodelist]] %>% - dplyr::mutate(domain_id = tolower(.data$domain_id)) |> - dplyr::left_join(cdm[[tableDomainsData]], - by = "domain_id") |> - dplyr::compute(name = tableCodelist, - temporary = FALSE, - overwrite = TRUE) - - # Create records table - intermediateTable <- paste0(tablePrefix,"intermediate_table") - records <- getRelevantRecords(cdm = cdm, - tableCodelist = tableCodelist, - intermediateTable = intermediateTable, - tablePrefix = tablePrefix) - if(is.null(records)){ - cc <- dplyr::tibble() - cli::cli_inform(c( - "i" = "No records found in the cdm for the concepts provided." - )) - return(omopgenerics::emptySummarisedResult(settings = createSettings(result_type = "summarise_concept_counts", study_period = dateRange))) - } - - if (!is.null(dateRange)) - { - records <- records |> - dplyr::filter( - as.Date(date) >= !!dateRange[1]& as.Date(date) <= !!dateRange[2] - ) - if (is.null(warningEmptyStudyPeriod(records))){ - return(tibble::tibble()) - } - - } - records <- addStrataToOmopTable(records, "date", ageGroup, sex) - strata <- getStrataList(sex, ageGroup) - - - - if(interval != "overall"){ - intervalTibble <- getIntervalTibble(omopTable = records, - start_date_name = "date", - end_date_name = "date", - interval = interval, - unitInterval = unitInterval) - - cdm <- cdm |> omopgenerics::insertTable(name = paste0(tablePrefix,"interval"), table = intervalTibble) - - records <- splitIncidenceBetweenIntervals(cdm, records, "date", tablePrefix) - - strata <- omopgenerics::combineStrata(c(unique(unlist(getStrataList(sex,ageGroup))), "interval_group")) - } - - if(!"number subjects" %in% c(countBy)){records <- records |> dplyr::select(-"person_id")} - if(concept){ - group <- list("standard_concept_id") - }else{ - group <- list() - records <- records |> - dplyr::mutate("standard_concept_name" = !!names(x)) - } - - cc <- records |> - PatientProfiles::summariseResult(strata = strata, - variable = "standard_concept_name", - group = group, - includeOverallGroup = TRUE, - includeOverallStrata = TRUE, - counts = TRUE, - estimates = as.character()) |> - suppressMessages() |> - dplyr::filter(.data$variable_name %in% .env$countBy) |> - dplyr::mutate("variable_name" = stringr::str_to_sentence(.data$variable_name)) |> - dplyr::mutate(standard_concept_id = .data$group_level) |> - dplyr::mutate(group_name = "codelist_name") |> - dplyr::mutate(group_level = names(x)) |> - dplyr::mutate(cdm_name = omopgenerics::cdmName(cdm)) |> - dplyr::select(-c("additional_name", "additional_level")) |> - dplyr::left_join( - getConceptsInfo(records), - by = "standard_concept_id" - ) |> - dplyr::select(-"standard_concept_id") - - if(interval != "overall"){ - cc <- cc |> - omopgenerics::splitStrata() |> - dplyr::mutate("additional_level" = dplyr::if_else(.data$interval_group == "overall", .data$additional_level, paste0(.data$interval_group, " &&& ", .data$additional_level))) |> - dplyr::mutate("additional_name" = dplyr::if_else(.data$interval_group == "overall", .data$additional_name, paste0("time_interval &&& ", .data$additional_name))) |> - dplyr::mutate("additional_level" = gsub(" &&& overall$", "", .data$additional_level)) |> - dplyr::mutate("additional_name" = gsub(" &&& overall$", "", .data$additional_name)) |> - omopgenerics::uniteStrata(unique(unlist(strata))[unique(unlist(strata)) != "interval_group"]) |> - dplyr::select(-"interval_group") - } - CDMConnector::dropTable(cdm = cdm, name = dplyr::starts_with(tablePrefix)) - - return(cc) -} - -getRelevantRecords <- function(cdm, - tableCodelist, - intermediateTable, - tablePrefix){ - - codes <- cdm[[tableCodelist]] |> dplyr::collect() - - tableName <- purrr::discard(unique(codes$table_name), is.na) - standardConceptIdName <- purrr::discard(unique(codes$standard_concept), is.na) - sourceConceptIdName <- purrr::discard(unique(codes$source_concept), is.na) - dateName <- purrr::discard(unique(codes$start_date), is.na) - - if(length(tableName)>0){ - codeRecords <- cdm[[tableName[[1]]]] - - if(is.null(codeRecords)){return(NULL)} - - tableCodes <- paste0(tablePrefix, "table_codes") - - cdm <- omopgenerics::insertTable(cdm = cdm, - name = tableCodes, - table = codes %>% - dplyr::filter(.data$table_name == !!tableName[[1]]) %>% - dplyr::select("concept_id", "domain_id"), - overwrite = TRUE, - temporary = FALSE) - - codeRecords <- codeRecords %>% - dplyr::mutate(date = !!dplyr::sym(dateName[[1]])) %>% - dplyr::select(dplyr::all_of(c("person_id", - standardConceptIdName[[1]], - sourceConceptIdName[[1]], - "date"))) %>% - dplyr::rename("standard_concept_id" = .env$standardConceptIdName[[1]], - "source_concept_id" = .env$sourceConceptIdName[[1]]) %>% - dplyr::inner_join(cdm[[tableCodes]], - by = c("standard_concept_id"="concept_id")) %>% - filterInObservation(indexDate = "date") |> - dplyr::compute( - name = paste0(intermediateTable,"_grr"), - temporary = FALSE, - schema = attr(cdm, "write_schema"), - overwrite = TRUE - ) - - CDMConnector::dropTable(cdm = cdm, name = tableCodes) - cdm[[tableCodes]] <- NULL - - } else { - return(NULL) - } - - # get for any additional domains and union - if(length(tableName) > 1) { - for(i in 1:(length(tableName)-1)) { - workingRecords <- cdm[[tableName[[i+1]]]] - - workingRecords <- workingRecords %>% - dplyr::mutate(date = !!dplyr::sym(dateName[[i+1]])) %>% - dplyr::mutate(year = clock::get_year(date)) %>% - dplyr::select(dplyr::all_of(c("person_id", - standardConceptIdName[[i+1]], - sourceConceptIdName[[i+1]], - "date", "year"))) %>% - dplyr::rename("standard_concept_id" = .env$standardConceptIdName[[i+1]], - "source_concept_id" = .env$sourceConceptIdName[[i+1]]) %>% - dplyr::inner_join(codes %>% - dplyr::filter(.data$table_name == tableName[[i+1]]) %>% - dplyr::select("concept_id", "domain_id"), - by = c("standard_concept_id"="concept_id"), - copy = TRUE) - - if(workingRecords %>% utils::head(1) %>% dplyr::tally() %>% dplyr::pull("n") >0){ - codeRecords <- codeRecords %>% - dplyr::union_all(workingRecords) %>% - dplyr::compute( - name = paste0(intermediateTable,"_grr_i"), - temporary = FALSE, - schema = attr(cdm, "write_schema"), - overwrite = TRUE - ) - } - } - } - - if(codeRecords %>% utils::head(1) %>% dplyr::tally() %>% dplyr::pull("n") >0){ - codeRecords <- codeRecords %>% - dplyr::left_join(cdm[["concept"]] %>% - dplyr::select("concept_id", "concept_name"), - by = c("standard_concept_id"="concept_id")) %>% - dplyr::rename("standard_concept_name"="concept_name") %>% - dplyr::left_join(cdm[["concept"]] %>% - dplyr::select("concept_id", "concept_name"), - by = c("source_concept_id"="concept_id")) %>% - dplyr::rename("source_concept_name"="concept_name") %>% - dplyr::mutate(source_concept_name = dplyr::if_else(is.na(.data$source_concept_name), - "NA", .data$source_concept_name)) %>% - dplyr::compute( - name = paste0(intermediateTable,"_grr_cr"), - temporary = FALSE, - schema = attr(cdm, "write_schema"), - overwrite = TRUE - ) - } - - return(codeRecords) -} - -getConceptsInfo <- function(records){ - records |> - dplyr::select("standard_concept_name", "standard_concept_id", "source_concept_name", "source_concept_id", "domain_id") |> - dplyr::distinct() |> - dplyr::collect() |> - dplyr::mutate("additional_name" = "standard_concept_name &&& standard_concept_id &&& source_concept_name &&& source_concept_id &&& domain_id") |> - dplyr::mutate("additional_level" = paste0(.data$standard_concept_name, " &&& ",.data$standard_concept_id, " &&& ", .data$source_concept_name, " &&& ", .data$source_concept_id, " &&& ", .data$domain_id)) |> - dplyr::select("standard_concept_id","additional_name", "additional_level") |> - dplyr::mutate(dplyr::across(dplyr::everything(), as.character)) |> - dplyr::add_row( - "standard_concept_id" = "overall", - "additional_name" = "overall", - "additional_level" = "overall" - ) + lifecycle::deprecate_warn(when = "0.2.0", what = "summariseConceptCounts()", with = "summariseConceptSetCounts()") + return(summariseConceptSetCounts(cdm = cdm, + conceptSet = conceptId, + countBy = countBy, + concept = concept, + interval = interval, + sex = sex, + ageGroup = ageGroup, + dateRange = dateRange)) } diff --git a/R/summariseConceptSetCounts.R b/R/summariseConceptSetCounts.R new file mode 100644 index 00000000..71e09671 --- /dev/null +++ b/R/summariseConceptSetCounts.R @@ -0,0 +1,360 @@ + +#' Summarise concept counts in patient-level data. Only concepts recorded during observation period are counted. +#' +#' @param cdm A cdm object +#' @param conceptSet List of concept IDs to summarise. +#' @param countBy Either "record" for record-level counts or "person" for +#' person-level counts +#' @param concept TRUE or FALSE. If TRUE code use will be summarised by concept. +#' @param interval Time interval to stratify by. It can either be "years", "quarters", "months" or "overall". +#' @param sex TRUE or FALSE. If TRUE code use will be summarised by sex. +#' @param ageGroup A list of ageGroup vectors of length two. Code use will be +#' thus summarised by age groups. +#' @param dateRange A list containing the minimum and the maximum dates +#' defining the time range within which the analysis is performed. +#' @return A summarised_result object with results overall and, if specified, by +#' strata. +#' @export +#' @examples +#' \donttest{ +#' library(OmopSketch) +#' +#' cdm <- mockOmopSketch() +#' +#' cs <- list(sumatriptan = c(35604883, 35604879, 35604880, 35604884)) +#' +#' results <- summariseConceptSetCounts(cdm, conceptSet = cs) +#' +#' results +#' +#' PatientProfiles::mockDisconnect(cdm) +#' +#' } +summariseConceptSetCounts <- function(cdm, + conceptSet, + countBy = c("record", "person"), + concept = TRUE, + interval = "overall", + sex = FALSE, + ageGroup = NULL, + dateRange = NULL){ + + omopgenerics::validateCdmArgument(cdm) + omopgenerics::assertList(conceptSet, named = TRUE) + checkCountBy(countBy) + omopgenerics::assertChoice(countBy, choices = c("record", "person")) + countBy <- gsub("persons","subjects",paste0("number ",countBy,"s")) + x <- validateIntervals(interval) + interval <- x$interval + unitInterval <- x$unitInterval + omopgenerics::assertNumeric(unitInterval, length = 1, min = 1, na = TRUE) + omopgenerics::assertLogical(concept, length = 1) + omopgenerics::assertLogical(sex, length = 1) + ageGroup <- omopgenerics::validateAgeGroupArgument(ageGroup, ageGroupName = "")[[1]] + dateRange <- validateStudyPeriod(cdm, dateRange) + # Get all concepts in concept table if conceptSet is NULL + # if(is.null(conceptSet)) { + # conceptSet <- cdm$concept |> + # dplyr::select("concept_name", "concept_id") |> + # dplyr::collect() |> + # dplyr::group_by(.data$concept_name) |> + # dplyr::summarise(named_vec = list(.data$concept_id)) |> + # tibble::deframe() + # } + + codeUse <- list() + cli::cli_progress_bar("Getting use of codes", total = length(conceptSet)) + for(i in 1:length(conceptSet)) { + cli::cli_alert_info("Getting concept counts of {names(conceptSet)[i]}") + codeUse[[i]] <- getCodeUse(conceptSet[i], + cdm = cdm, + countBy = countBy, + concept = concept, + interval = interval, + unitInterval = unitInterval, + sex = sex, + ageGroup = ageGroup, + dateRange = dateRange) + Sys.sleep(i/length(conceptSet)) + cli::cli_progress_update() + } + codeUse <- codeUse |> + dplyr::bind_rows() + cli::cli_progress_done() + + if(nrow(codeUse) > 0) { + codeUse <- codeUse %>% + dplyr::mutate( + result_id = as.integer(1), + cdm_name = omopgenerics::cdmName(cdm) + ) + } else { + codeUse <- omopgenerics::emptySummarisedResult(settings = createSettings(result_type = "summarise_concept_set_counts", study_period = dateRange)) + } + + codeUse <- codeUse %>% + omopgenerics::newSummarisedResult( + createSettings(result_type = "summarise_concept_set_counts", study_period = dateRange) + ) + return(codeUse) +} + +getCodeUse <- function(x, + cdm, + countBy, + concept, + interval, + unitInterval, + sex, + ageGroup, + dateRange, + call = parent.frame()){ + + tablePrefix <- omopgenerics::tmpPrefix() + + omopgenerics::assertNumeric(x[[1]], integerish = TRUE) + omopgenerics::assertList(x) + + # Create code list table + tableCodelist <- paste0(tablePrefix,"codelist") + cdm <- omopgenerics::insertTable(cdm = cdm, + name = tableCodelist, + table = dplyr::tibble(concept_id = x[[1]]), + overwrite = TRUE, + temporary = FALSE) + + cdm[[tableCodelist]] <- cdm[[tableCodelist]] %>% + dplyr::left_join( + cdm[["concept"]] %>% dplyr::select("concept_id", "domain_id"), + by = "concept_id" + ) + + # Create domains table + tableDomainsData <- paste0(tablePrefix,"domains_data") + cdm <- omopgenerics::insertTable(cdm = cdm, + name = tableDomainsData, + table = tables, + overwrite = TRUE, + temporary = FALSE) + + cdm[[tableCodelist]] <- cdm[[tableCodelist]] %>% + dplyr::mutate(domain_id = tolower(.data$domain_id)) |> + dplyr::left_join(cdm[[tableDomainsData]], + by = "domain_id") |> + dplyr::compute(name = tableCodelist, + temporary = FALSE, + overwrite = TRUE) + + # Create records table + intermediateTable <- paste0(tablePrefix,"intermediate_table") + records <- getRelevantRecords(cdm = cdm, + tableCodelist = tableCodelist, + intermediateTable = intermediateTable, + tablePrefix = tablePrefix) + if(is.null(records)){ + cc <- dplyr::tibble() + cli::cli_inform(c( + "i" = "No records found in the cdm for the concepts provided." + )) + return(omopgenerics::emptySummarisedResult(settings = createSettings(result_type = "summarise_concept_set_counts", study_period = dateRange))) + } + + if (!is.null(dateRange)) + { + records <- records |> + dplyr::filter( + as.Date(date) >= !!dateRange[1]& as.Date(date) <= !!dateRange[2] + ) + if (is.null(warningEmptyStudyPeriod(records))){ + return(tibble::tibble()) + } + + } + records <- addStrataToOmopTable(records, "date", ageGroup, sex) + strata <- getStrataList(sex, ageGroup) + + + + if(interval != "overall"){ + intervalTibble <- getIntervalTibble(omopTable = records, + start_date_name = "date", + end_date_name = "date", + interval = interval, + unitInterval = unitInterval) + + cdm <- cdm |> omopgenerics::insertTable(name = paste0(tablePrefix,"interval"), table = intervalTibble) + + records <- splitIncidenceBetweenIntervals(cdm, records, "date", tablePrefix) + + strata <- omopgenerics::combineStrata(c(unique(unlist(getStrataList(sex,ageGroup))), "interval_group")) + } + + if(!"number subjects" %in% c(countBy)){records <- records |> dplyr::select(-"person_id")} + if(concept){ + group <- list("standard_concept_id") + }else{ + group <- list() + records <- records |> + dplyr::mutate("standard_concept_name" = !!names(x)) + } + + cc <- records |> + PatientProfiles::summariseResult(strata = strata, + variable = "standard_concept_name", + group = group, + includeOverallGroup = TRUE, + includeOverallStrata = TRUE, + counts = TRUE, + estimates = as.character()) |> + suppressMessages() |> + dplyr::filter(.data$variable_name %in% .env$countBy) |> + dplyr::mutate("variable_name" = stringr::str_to_sentence(.data$variable_name)) |> + dplyr::mutate(standard_concept_id = .data$group_level) |> + dplyr::mutate(group_name = "codelist_name") |> + dplyr::mutate(group_level = names(x)) |> + dplyr::mutate(cdm_name = omopgenerics::cdmName(cdm)) |> + dplyr::select(-c("additional_name", "additional_level")) |> + dplyr::left_join( + getConceptsInfo(records), + by = "standard_concept_id" + ) |> + dplyr::select(-"standard_concept_id") + + if(interval != "overall"){ + cc <- cc |> + omopgenerics::splitStrata() |> + dplyr::mutate("additional_level" = dplyr::if_else(.data$interval_group == "overall", .data$additional_level, paste0(.data$interval_group, " &&& ", .data$additional_level))) |> + dplyr::mutate("additional_name" = dplyr::if_else(.data$interval_group == "overall", .data$additional_name, paste0("time_interval &&& ", .data$additional_name))) |> + dplyr::mutate("additional_level" = gsub(" &&& overall$", "", .data$additional_level)) |> + dplyr::mutate("additional_name" = gsub(" &&& overall$", "", .data$additional_name)) |> + omopgenerics::uniteStrata(unique(unlist(strata))[unique(unlist(strata)) != "interval_group"]) |> + dplyr::select(-"interval_group") + } + CDMConnector::dropTable(cdm = cdm, name = dplyr::starts_with(tablePrefix)) + + return(cc) +} + +getRelevantRecords <- function(cdm, + tableCodelist, + intermediateTable, + tablePrefix){ + + codes <- cdm[[tableCodelist]] |> dplyr::collect() + + tableName <- purrr::discard(unique(codes$table_name), is.na) + standardconceptSetName <- purrr::discard(unique(codes$standard_concept), is.na) + sourceconceptSetName <- purrr::discard(unique(codes$source_concept), is.na) + dateName <- purrr::discard(unique(codes$start_date), is.na) + + if(length(tableName)>0){ + codeRecords <- cdm[[tableName[[1]]]] + + if(is.null(codeRecords)){return(NULL)} + + tableCodes <- paste0(tablePrefix, "table_codes") + + cdm <- omopgenerics::insertTable(cdm = cdm, + name = tableCodes, + table = codes %>% + dplyr::filter(.data$table_name == !!tableName[[1]]) %>% + dplyr::select("concept_id", "domain_id"), + overwrite = TRUE, + temporary = FALSE) + + codeRecords <- codeRecords %>% + dplyr::mutate(date = !!dplyr::sym(dateName[[1]])) %>% + dplyr::select(dplyr::all_of(c("person_id", + standardconceptSetName[[1]], + sourceconceptSetName[[1]], + "date"))) %>% + dplyr::rename("standard_concept_id" = .env$standardconceptSetName[[1]], + "source_concept_id" = .env$sourceconceptSetName[[1]]) %>% + dplyr::inner_join(cdm[[tableCodes]], + by = c("standard_concept_id"="concept_id")) %>% + filterInObservation(indexDate = "date") |> + dplyr::compute( + name = paste0(intermediateTable,"_grr"), + temporary = FALSE, + schema = attr(cdm, "write_schema"), + overwrite = TRUE + ) + + CDMConnector::dropTable(cdm = cdm, name = tableCodes) + cdm[[tableCodes]] <- NULL + + } else { + return(NULL) + } + + # get for any additional domains and union + if(length(tableName) > 1) { + for(i in 1:(length(tableName)-1)) { + workingRecords <- cdm[[tableName[[i+1]]]] + + workingRecords <- workingRecords %>% + dplyr::mutate(date = !!dplyr::sym(dateName[[i+1]])) %>% + dplyr::mutate(year = clock::get_year(date)) %>% + dplyr::select(dplyr::all_of(c("person_id", + standardconceptSetName[[i+1]], + sourceconceptSetName[[i+1]], + "date", "year"))) %>% + dplyr::rename("standard_concept_id" = .env$standardconceptSetName[[i+1]], + "source_concept_id" = .env$sourceconceptSetName[[i+1]]) %>% + dplyr::inner_join(codes %>% + dplyr::filter(.data$table_name == tableName[[i+1]]) %>% + dplyr::select("concept_id", "domain_id"), + by = c("standard_concept_id"="concept_id"), + copy = TRUE) + + if(workingRecords %>% utils::head(1) %>% dplyr::tally() %>% dplyr::pull("n") >0){ + codeRecords <- codeRecords %>% + dplyr::union_all(workingRecords) %>% + dplyr::compute( + name = paste0(intermediateTable,"_grr_i"), + temporary = FALSE, + schema = attr(cdm, "write_schema"), + overwrite = TRUE + ) + } + } + } + + if(codeRecords %>% utils::head(1) %>% dplyr::tally() %>% dplyr::pull("n") >0){ + codeRecords <- codeRecords %>% + dplyr::left_join(cdm[["concept"]] %>% + dplyr::select("concept_id", "concept_name"), + by = c("standard_concept_id"="concept_id")) %>% + dplyr::rename("standard_concept_name"="concept_name") %>% + dplyr::left_join(cdm[["concept"]] %>% + dplyr::select("concept_id", "concept_name"), + by = c("source_concept_id"="concept_id")) %>% + dplyr::rename("source_concept_name"="concept_name") %>% + dplyr::mutate(source_concept_name = dplyr::if_else(is.na(.data$source_concept_name), + "NA", .data$source_concept_name)) %>% + dplyr::compute( + name = paste0(intermediateTable,"_grr_cr"), + temporary = FALSE, + schema = attr(cdm, "write_schema"), + overwrite = TRUE + ) + } + + return(codeRecords) +} + +getConceptsInfo <- function(records){ + records |> + dplyr::select("standard_concept_name", "standard_concept_id", "source_concept_name", "source_concept_id", "domain_id") |> + dplyr::distinct() |> + dplyr::collect() |> + dplyr::mutate("additional_name" = "standard_concept_name &&& standard_concept_id &&& source_concept_name &&& source_concept_id &&& domain_id") |> + dplyr::mutate("additional_level" = paste0(.data$standard_concept_name, " &&& ",.data$standard_concept_id, " &&& ", .data$source_concept_name, " &&& ", .data$source_concept_id, " &&& ", .data$domain_id)) |> + dplyr::select("standard_concept_id","additional_name", "additional_level") |> + dplyr::mutate(dplyr::across(dplyr::everything(), as.character)) |> + dplyr::add_row( + "standard_concept_id" = "overall", + "additional_name" = "overall", + "additional_level" = "overall" + ) +} diff --git a/README.Rmd b/README.Rmd index 982ca857..807452a7 100644 --- a/README.Rmd +++ b/README.Rmd @@ -96,9 +96,9 @@ OmopSketch also provides functions to explore some of (or all) the concepts in t ```{r} acetaminophen <- c(1125315, 1127433, 1127078) -summariseConceptCounts(cdm, conceptId = list("acetaminophen" = acetaminophen)) |> +summariseConceptSetCounts(cdm, conceptSet = list("acetaminophen" = acetaminophen)) |> filter(estimate_name == "record_count") |> - plotConceptCounts() + plotConceptSetCounts() ``` ### Characterise the population diff --git a/README.md b/README.md index dbcca14f..e939a3f1 100644 --- a/README.md +++ b/README.md @@ -173,7 +173,7 @@ concepts in the dataset. ``` r acetaminophen <- c(1125315, 1127433, 1127078) -summariseConceptCounts(cdm, conceptId = list("acetaminophen" = acetaminophen)) |> +summariseConceptSetCounts(cdm, conceptSet = list("acetaminophen" = acetaminophen)) |> filter(estimate_name == "record_count") |> plotConceptCounts() #> ℹ Getting use of codes from acetaminophen diff --git a/_pkgdown.yml b/_pkgdown.yml index 6f67a4b6..cd91ef0d 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -28,11 +28,15 @@ reference: - subtitle: Counts desc: Summarise concept code use in the OMOP Common Data Model - contents: - - summariseConceptCounts - - plotConceptCounts + - summariseConceptSetCounts + - plotConceptSetCounts - summariseAllConceptCounts - tableAllConceptCounts - subtitle: Mock Database desc: Create a mock database to test the OmopSketch package - contents: - mockOmopSketch +- subtitle: Deprecated functions + desc: These functions have been deprecated +- contents: + - summariseConceptCounts diff --git a/man/plotConceptCounts.Rd b/man/plotConceptSetCounts.Rd similarity index 62% rename from man/plotConceptCounts.Rd rename to man/plotConceptSetCounts.Rd index 2492a3d5..8a144f88 100644 --- a/man/plotConceptCounts.Rd +++ b/man/plotConceptSetCounts.Rd @@ -1,13 +1,13 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plotConceptCounts.R -\name{plotConceptCounts} -\alias{plotConceptCounts} -\title{Plot the concept counts of a summariseConceptCounts output.} +% Please edit documentation in R/plotConceptSetCounts.R +\name{plotConceptSetCounts} +\alias{plotConceptSetCounts} +\title{Plot the concept counts of a summariseConceptSetCounts output.} \usage{ -plotConceptCounts(result, facet = NULL, colour = NULL) +plotConceptSetCounts(result, facet = NULL, colour = NULL) } \arguments{ -\item{result}{A summarised_result object (output of summariseConceptCounts).} +\item{result}{A summarised_result object (output of summariseConceptSetCounts).} \item{facet}{Columns to face by. Formula format can be provided. See possible columns to face by with: \code{visOmopResults::tidyColumns()}.} @@ -19,7 +19,7 @@ columns to face by with: \code{visOmopResults::tidyColumns()}.} A ggplot2 object showing the concept counts. } \description{ -Plot the concept counts of a summariseConceptCounts output. +Plot the concept counts of a summariseConceptSetCounts output. } \examples{ \donttest{ @@ -28,8 +28,8 @@ library(dplyr) cdm <- mockOmopSketch() result <- cdm |> - summariseConceptCounts( - conceptId = list( + summariseConceptSetCounts( + conceptSet= list( "Renal agenesis" = 194152, "Manic mood" = c(4226696, 4304866, 37110496, 40371897) ) @@ -37,7 +37,7 @@ result <- cdm |> result |> filter(variable_name == "Number subjects") |> - plotConceptCounts(facet = "codelist_name", colour = "standard_concept_name") + plotConceptSetCounts(facet = "codelist_name", colour = "standard_concept_name") PatientProfiles::mockDisconnect(cdm) } diff --git a/man/summariseConceptSetCounts.Rd b/man/summariseConceptSetCounts.Rd new file mode 100644 index 00000000..b75e4a2b --- /dev/null +++ b/man/summariseConceptSetCounts.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/summariseConceptSetCounts.R +\name{summariseConceptSetCounts} +\alias{summariseConceptSetCounts} +\title{Summarise concept counts in patient-level data. Only concepts recorded during observation period are counted.} +\usage{ +summariseConceptSetCounts( + cdm, + conceptSet, + countBy = c("record", "person"), + concept = TRUE, + interval = "overall", + sex = FALSE, + ageGroup = NULL, + dateRange = NULL +) +} +\arguments{ +\item{cdm}{A cdm object} + +\item{conceptSet}{List of concept IDs to summarise.} + +\item{countBy}{Either "record" for record-level counts or "person" for +person-level counts} + +\item{concept}{TRUE or FALSE. If TRUE code use will be summarised by concept.} + +\item{interval}{Time interval to stratify by. It can either be "years", "quarters", "months" or "overall".} + +\item{sex}{TRUE or FALSE. If TRUE code use will be summarised by sex.} + +\item{ageGroup}{A list of ageGroup vectors of length two. Code use will be +thus summarised by age groups.} + +\item{dateRange}{A list containing the minimum and the maximum dates +defining the time range within which the analysis is performed.} +} +\value{ +A summarised_result object with results overall and, if specified, by +strata. +} +\description{ +Summarise concept counts in patient-level data. Only concepts recorded during observation period are counted. +} +\examples{ +\donttest{ +library(OmopSketch) + +cdm <- mockOmopSketch() + +cs <- list(sumatriptan = c(35604883, 35604879, 35604880, 35604884)) + +results <- summariseConceptSetCounts(cdm, conceptSet = cs) + +results + +PatientProfiles::mockDisconnect(cdm) + +} +} diff --git a/tests/testthat/test-summariseConceptCounts.R b/tests/testthat/test-summariseConceptSetCounts.R similarity index 86% rename from tests/testthat/test-summariseConceptCounts.R rename to tests/testthat/test-summariseConceptSetCounts.R index 9d268e5a..1425cdce 100644 --- a/tests/testthat/test-summariseConceptCounts.R +++ b/tests/testthat/test-summariseConceptSetCounts.R @@ -7,8 +7,8 @@ test_that("summarise code use - eunomia", { cs <- list(acetiminophen = acetiminophen, poliovirus_vaccine = poliovirus_vaccine) startNames <- CDMConnector::listSourceTables(cdm) - results <- summariseConceptCounts(cdm = cdm, - conceptId = cs, + results <- summariseConceptSetCounts(cdm = cdm, + conceptSet = cs, interval = "years", countBy = c("record", "person"), concept = TRUE, @@ -20,7 +20,7 @@ test_that("summarise code use - eunomia", { expect_true(length(setdiff(endNames, startNames)) == 0) #Check result type - checkResultType(results, "summarise_concept_counts") + checkResultType(results, "summarise_concept_set_counts") # min cell counts: expect_equal( @@ -170,8 +170,8 @@ test_that("summarise code use - eunomia", { dplyr::tally() %>% dplyr::pull("n")) - results1 <- summariseConceptCounts(cdm = cdm, - conceptId = cs, + results1 <- summariseConceptSetCounts(cdm = cdm, + conceptSet = cs, interval = "years", concept = FALSE, sex = TRUE, @@ -210,7 +210,7 @@ test_that("summarise code use - eunomia", { dplyr::pull("n")) expect_true(results1$group_level |> unique() |> length() == 2) - results <- summariseConceptCounts(list("acetiminophen" = acetiminophen), + results <- summariseConceptSetCounts(list("acetiminophen" = acetiminophen), cdm = cdm, countBy = "person", interval = "years", sex = FALSE, @@ -221,7 +221,7 @@ test_that("summarise code use - eunomia", { dplyr::filter(variable_name == "Number records")) == 0) - results <- summariseConceptCounts(list("acetiminophen" = acetiminophen), + results <- summariseConceptSetCounts(list("acetiminophen" = acetiminophen), cdm = cdm, countBy = "record", interval = "years", sex = FALSE, @@ -233,54 +233,54 @@ test_that("summarise code use - eunomia", { # domains covered # condition - expect_true(nrow(summariseConceptCounts(list(cs= c(4112343)), + expect_true(nrow(summariseConceptSetCounts(list(cs= c(4112343)), cdm = cdm, interval = "years", sex = FALSE, ageGroup = NULL))>1) # visit - expect_true(nrow(summariseConceptCounts(list(cs= c(9201)), + expect_true(nrow(summariseConceptSetCounts(list(cs= c(9201)), cdm = cdm, interval = "years", sex = FALSE, ageGroup = NULL))>1) # drug - expect_true(nrow(summariseConceptCounts(list(cs= c(40213160)), + expect_true(nrow(summariseConceptSetCounts(list(cs= c(40213160)), cdm = cdm, interval = "years", sex = FALSE, ageGroup = NULL))>1) # measurement - expect_true(nrow(summariseConceptCounts(list(cs= c(3006322)), + expect_true(nrow(summariseConceptSetCounts(list(cs= c(3006322)), cdm = cdm, interval = "years", sex = FALSE, ageGroup = NULL))>1) # procedure and condition - expect_true(nrow(summariseConceptCounts(list(cs= c(4107731,4112343)), + expect_true(nrow(summariseConceptSetCounts(list(cs= c(4107731,4112343)), cdm = cdm, interval = "years", sex = FALSE, ageGroup = NULL))>1) # no records - expect_message(results <- summariseConceptCounts(list(cs= c(999999)), + expect_message(results <- summariseConceptSetCounts(list(cs= c(999999)), cdm = cdm, interval = "years", sex = FALSE, ageGroup = NULL)) expect_true(nrow(results) == 0) - # conceptId NULL (but reduce the computational time by filtering concepts first) + # conceptSet NULL (but reduce the computational time by filtering concepts first) # cdm$concept <- cdm$concept |> # dplyr::filter(grepl("k", concept_name)) # - # skip("conceptId = NULL not supported yet") - # results <- summariseConceptCounts(cdm = cdm, + # skip("conceptSet = NULL not supported yet") + # results <- summariseConceptSetCounts(cdm = cdm, # year = FALSE, # sex = FALSE, # ageGroup = NULL) @@ -297,8 +297,8 @@ test_that("summarise code use - eunomia", { # expect_true(all(results_concepts %in% c("overall",concepts))) # check attributes - results <- summariseConceptCounts(cdm = cdm, - conceptId = cs, + results <- summariseConceptSetCounts(cdm = cdm, + conceptSet = cs, interval = "years", sex = TRUE, ageGroup = list(c(0,17), @@ -306,46 +306,46 @@ test_that("summarise code use - eunomia", { c(66, 100))) expect_true(omopgenerics::settings(results)$package_name == "OmopSketch") - expect_true(omopgenerics::settings(results)$result_type == "summarise_concept_counts") + expect_true(omopgenerics::settings(results)$result_type == "summarise_concept_set_counts") expect_true(omopgenerics::settings(results)$package_version == packageVersion("OmopSketch")) # expected errors# expected errors - expect_error(summariseConceptCounts("not a concept", + expect_error(summariseConceptSetCounts("not a concept", cdm = cdm, interval = "years", sex = FALSE, ageGroup = NULL)) - expect_error(summariseConceptCounts("123", + expect_error(summariseConceptSetCounts("123", cdm = cdm, interval = "years", sex = FALSE, ageGroup = NULL)) - expect_error(summariseConceptCounts(list("123"), # not named + expect_error(summariseConceptSetCounts(list("123"), # not named cdm = cdm, interval = "years", sex = FALSE, ageGroup = NULL)) - expect_error(summariseConceptCounts(list(a = 123), + expect_error(summariseConceptSetCounts(list(a = 123), cdm = "not a cdm", interval = "years", sex = FALSE, ageGroup = NULL)) - expect_error(summariseConceptCounts(list(a = 123), + expect_error(summariseConceptSetCounts(list(a = 123), cdm = cdm, interval = "Maybe", sex = FALSE, ageGroup = NULL)) - expect_error(summariseConceptCounts(list(a = 123), + expect_error(summariseConceptSetCounts(list(a = 123), cdm = cdm, interval = "years", sex = "Maybe", ageGroup = NULL)) - expect_error(summariseConceptCounts(list(a = 123), + expect_error(summariseConceptSetCounts(list(a = 123), cdm = cdm, interval = "years", sex = FALSE, ageGroup = list(c(18,17)))) - expect_error(summariseConceptCounts(list(a = 123), + expect_error(summariseConceptSetCounts(list(a = 123), cdm = cdm, interval = "years", sex = FALSE, @@ -414,13 +414,13 @@ test_that("summarise code use - mock data", { cdm <- CDMConnector::copyCdmTo( con = connection(), cdm = cdm, schema = schema()) - conceptId <- list( + conceptSet <- list( "Arthritis" = c(17,3), "Musculoskeletal disorder" = c(1), "Osteoarthritis of hip" = c(5) ) - result <- summariseConceptCounts(cdm, conceptId) + result <- summariseConceptSetCounts(cdm, conceptSet) # Arthritis (codes 3 and 17), one record of 17 per ind and one record of 3 ind 1 expect_equal(result |> @@ -449,7 +449,7 @@ test_that("summarise code use - mock data", { dplyr::pull(estimate_value), c("2","2")) - result <- summariseConceptCounts(cdm, conceptId, ageGroup = list(c(0,2), c(3,150)), sex = TRUE) + result <- summariseConceptSetCounts(cdm, conceptSet, ageGroup = list(c(0,2), c(3,150)), sex = TRUE) # Individuals belong to the same age group but to different sex groups # Arthritis (codes 3 and 17), one record of each per ind @@ -509,27 +509,27 @@ test_that("summarise code use - mock data", { PatientProfiles::mockDisconnect(cdm) }) -test_that("plot concept counts works", { +test_that("plot concept set counts works", { skip_on_cran() # Load mock database ---- cdm <- cdmEunomia() # summariseInObservationPlot plot ---- - x <- summariseConceptCounts(cdm, conceptId = list(codes = c(40213160))) - expect_error(plotConceptCounts(x)) + x <- summariseConceptSetCounts(cdm, conceptSet = list(codes = c(40213160))) + expect_error(plotConceptSetCounts(x)) x <- x |> dplyr::filter(variable_name == "Number records") - expect_no_error(plotConceptCounts(x)) - expect_true(inherits(plotConceptCounts(x), "ggplot")) + expect_no_error(plotConceptSetCounts(x)) + expect_true(inherits(plotConceptSetCounts(x), "ggplot")) - x <- summariseConceptCounts(cdm, - conceptId = list("polio" = c(40213160), + x <- summariseConceptSetCounts(cdm, + conceptSet = list("polio" = c(40213160), "acetaminophen" = c(1125315, 1127433, 40229134, 40231925, 40162522, 19133768, 1127078))) - expect_error(plotConceptCounts(x)) + expect_error(plotConceptSetCounts(x)) x <- x |> dplyr::filter(variable_name == "Number records") - expect_no_error(plotConceptCounts(x)) - expect_message(plotConceptCounts(x)) - expect_no_error(plotConceptCounts(x, facet = "codelist_name")) - expect_no_error(plotConceptCounts(x, colour = "codelist_name")) + expect_no_error(plotConceptSetCounts(x)) + expect_message(plotConceptSetCounts(x)) + expect_no_error(plotConceptSetCounts(x, facet = "codelist_name")) + expect_no_error(plotConceptSetCounts(x, colour = "codelist_name")) x <- x |> dplyr::filter(result_id == -1) expect_error(plotInObservation(x)) @@ -542,19 +542,19 @@ test_that("dateRange argument works", { # Load mock database ---- cdm <- cdmEunomia() - expect_no_error(summariseConceptCounts(cdm,conceptId = list("polio" = c(40213160)), dateRange = as.Date(c("1930-01-01", "2018-01-01")))) - expect_message(x<-summariseConceptCounts(cdm,conceptId = list("polio" = c(40213160)), dateRange = as.Date(c("1930-01-01", "2025-01-01")))) + expect_no_error(summariseConceptSetCounts(cdm,conceptSet = list("polio" = c(40213160)), dateRange = as.Date(c("1930-01-01", "2018-01-01")))) + expect_message(x<-summariseConceptSetCounts(cdm,conceptSet = list("polio" = c(40213160)), dateRange = as.Date(c("1930-01-01", "2025-01-01")))) observationRange <- cdm$observation_period |> dplyr::summarise(minobs = min(.data$observation_period_start_date, na.rm = TRUE), maxobs = max(.data$observation_period_end_date, na.rm = TRUE)) - expect_no_error(y<- summariseConceptCounts(cdm,conceptId = list("polio" = c(40213160)), dateRange = as.Date(c("1930-01-01", observationRange |>dplyr::pull("maxobs"))))) + expect_no_error(y<- summariseConceptSetCounts(cdm,conceptSet = list("polio" = c(40213160)), dateRange = as.Date(c("1930-01-01", observationRange |>dplyr::pull("maxobs"))))) expect_equal(x,y, ignore_attr = TRUE) expect_false(settings(x)$study_period_end==settings(y)$study_period_end) - expect_error(summariseConceptCounts(cdm,conceptId = list("polio" = c(40213160)), dateRange = as.Date(c("2015-01-01", "2014-01-01")))) - expect_warning(z<-summariseConceptCounts(cdm,conceptId = list("polio" = c(40213160)), dateRange = as.Date(c("2020-01-01", "2021-01-01")))) + expect_error(summariseConceptSetCounts(cdm,conceptSet = list("polio" = c(40213160)), dateRange = as.Date(c("2015-01-01", "2014-01-01")))) + expect_warning(z<-summariseConceptSetCounts(cdm,conceptSet = list("polio" = c(40213160)), dateRange = as.Date(c("2020-01-01", "2021-01-01")))) expect_equal(z, omopgenerics::emptySummarisedResult(), ignore_attr = TRUE) - expect_equal(summariseConceptCounts(cdm,conceptId = list("polio" = c(40213160)),dateRange = as.Date(c("1930-01-01",NA))), y, ignore_attr = TRUE) - checkResultType(z, "summarise_concept_counts") + expect_equal(summariseConceptSetCounts(cdm,conceptSet = list("polio" = c(40213160)),dateRange = as.Date(c("1930-01-01",NA))), y, ignore_attr = TRUE) + checkResultType(z, "summarise_concept_set_counts") expect_equal(colnames(settings(z)), colnames(settings(x))) PatientProfiles::mockDisconnect(cdm = cdm) }) diff --git a/vignettes/B-summarise_concept_counts.Rmd b/vignettes/B-summarise_concept_set_counts.Rmd similarity index 78% rename from vignettes/B-summarise_concept_counts.Rmd rename to vignettes/B-summarise_concept_set_counts.Rmd index fa5ae3e2..6df1d90c 100644 --- a/vignettes/B-summarise_concept_counts.Rmd +++ b/vignettes/B-summarise_concept_set_counts.Rmd @@ -27,7 +27,7 @@ if (!eunomia_is_available()) downloadEunomiaData() # Introduction -In this vignette, we will explore the *OmopSketch* functions designed to provide information about the number of counts of specific concepts. Specifically, there are two key functions that facilitate this, `summariseConceptCounts()` and `plotConceptCounts()`. The former one creates a summary statistics results with the number of counts per each concept, and the latter one creates a histogram plot. +In this vignette, we will explore the *OmopSketch* functions designed to provide information about the number of counts of specific concepts. Specifically, there are two key functions that facilitate this, `summariseConceptSetCounts()` and `plotConceptCounts()`. The former one creates a summary statistics results with the number of counts per each concept, and the latter one creates a histogram plot. ## Create a mock cdm @@ -73,19 +73,19 @@ sinusitis <- getCandidateCodes( ``` -Now we want to explore the occurrence of these concepts within the database. For that, we can use `summariseConceptCounts()` from OmopSketch: +Now we want to explore the occurrence of these concepts within the database. For that, we can use `summariseConceptSetCounts()` from OmopSketch: ```{r, warning=FALSE} -summariseConceptCounts(cdm, - conceptId = list("acetaminophen" = acetaminophen, +summariseConceptSetCounts(cdm, + conceptSet = list("acetaminophen" = acetaminophen, "sinusitis" = sinusitis)) |> select(group_level, variable_name, variable_level, estimate_name, estimate_value) |> glimpse() ``` By default, the function will provide information about either the number of records (`estimate_name == "record_count"`) for each concept_id or the number of people (`estimate_name == "person_count"`): ```{r, warning=FALSE} -summariseConceptCounts(cdm, - conceptId = list("acetaminophen" = acetaminophen, +summariseConceptSetCounts(cdm, + conceptSet = list("acetaminophen" = acetaminophen, "sinusitis" = sinusitis), countBy = c("record","person")) |> select(group_level, variable_name, estimate_name) |> @@ -97,8 +97,8 @@ summariseConceptCounts(cdm, However, we can specify which one is of interest using `countBy` argument: ```{r, warning=FALSE} -summariseConceptCounts(cdm, - conceptId = list("acetaminophen" = acetaminophen, +summariseConceptSetCounts(cdm, + conceptSet = list("acetaminophen" = acetaminophen, "sinusitis" = sinusitis), countBy = "record") |> select(group_level, variable_name, estimate_name) |> @@ -109,8 +109,8 @@ summariseConceptCounts(cdm, One can further stratify by year, sex or age group using the `year`, `sex`, and `ageGroup` arguments. ``` {r, warning=FALSE} -summariseConceptCounts(cdm, - conceptId = list("acetaminophen" = acetaminophen, +summariseConceptSetCounts(cdm, + conceptSet = list("acetaminophen" = acetaminophen, "sinusitis" = sinusitis), countBy = "person", interval = "years", @@ -123,36 +123,36 @@ summariseConceptCounts(cdm, Finally, we can visualise the concept counts using `plotRecordCounts()`. ```{r, warning=FALSE} -summariseConceptCounts(cdm, - conceptId = list("sinusitis" = sinusitis), +summariseConceptSetCounts(cdm, + conceptSet = list("sinusitis" = sinusitis), countBy = "person") |> - plotConceptCounts() + plotConceptSetCounts() ``` Notice that either person counts or record counts can be plotted. If both have been included in the summarised result, you will have to filter to only include one variable at time: ```{r, warning=FALSE} -summariseConceptCounts(cdm, - conceptId = list("sinusitis" = sinusitis), +summariseConceptSetCounts(cdm, + conceptSet = list("sinusitis" = sinusitis), countBy = c("person","record")) |> filter(variable_name == "Number subjects") |> - plotConceptCounts() + plotConceptSetCounts() ``` Additionally, if results were stratified by year, sex or age group, we can further use `facet` or `colour` arguments to highlight the different results in the plot. To help us identify by which variables we can colour or facet by, we can use [visOmopResult](https://darwin-eu.github.io/visOmopResults/) package. ```{r, warning=FALSE} -summariseConceptCounts(cdm, - conceptId = list("sinusitis" = sinusitis), +summariseConceptSetCounts(cdm, + conceptSet = list("sinusitis" = sinusitis), countBy = c("person"), sex = TRUE, ageGroup = list("<=50" = c(0,50), ">50" = c(51, Inf))) |> visOmopResults::tidyColumns() -summariseConceptCounts(cdm, - conceptId = list("sinusitis" = sinusitis), +summariseConceptSetCounts(cdm, + conceptSet = list("sinusitis" = sinusitis), countBy = c("person"), sex = TRUE, ageGroup = list("<=50" = c(0,50), ">50" = c(51, Inf))) |> - plotConceptCounts(facet = "sex", colour = "age_group") + plotConceptSetCounts(facet = "sex", colour = "age_group") ```