Skip to content

Commit

Permalink
Merge pull request #276 from OHDSI/sampling_option
Browse files Browse the repository at this point in the history
Sampling option
  • Loading branch information
catalamarti authored Dec 17, 2024
2 parents 28150e8 + e3a30f8 commit 5c31e8b
Show file tree
Hide file tree
Showing 13 changed files with 139 additions and 22 deletions.
5 changes: 4 additions & 1 deletion R/summariseAllConceptCounts.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ checkFeasibility <- function(omopTable, tableName, conceptId){
#' @param sex TRUE or FALSE. If TRUE code use will be summarised by sex.
#' @param ageGroup A list of ageGroup vectors of length two. Code use will be
#' thus summarised by age groups.
#' @param sample An integer to sample the tables to only that number of records.
#' If NULL no sample is done.
#' @param dateRange A list containing the minimum and the maximum dates
#' defining the time range within which the analysis is performed.
#' @return A summarised_result object with results overall and, if specified, by
Expand All @@ -63,6 +65,7 @@ summariseAllConceptCounts <- function(cdm,
year = FALSE,
sex = FALSE,
ageGroup = NULL,
sample = 1000000,
dateRange = NULL){

omopgenerics::validateCdmArgument(cdm)
Expand Down Expand Up @@ -93,7 +96,7 @@ summariseAllConceptCounts <- function(cdm,
}

omopTable <- restrictStudyPeriod(omopTable, dateRange)

omopTable <- sampleOmopTable(omopTable, sample)

indexDate <- startDate(omopgenerics::tableName(omopTable))

Expand Down
7 changes: 6 additions & 1 deletion R/summariseClinicalRecords.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
#' @param ageGroup A list of age groups to stratify results by.
#' @param sex Boolean variable. Whether to stratify by sex (TRUE) or not
#' (FALSE).
#' @param sample An integer to sample the tables to only that number of records.
#' If NULL no sample is done.
#' @param dateRange A list containing the minimum and the maximum dates
#' defining the time range within which the analysis is performed.
#' @return A summarised_result object.
Expand Down Expand Up @@ -55,6 +57,7 @@ summariseClinicalRecords <- function(cdm,
typeConcept = TRUE,
sex = FALSE,
ageGroup = NULL,
sample = 1000000,
dateRange = NULL) {
# Initial checks ----
omopgenerics::validateCdmArgument(cdm)
Expand Down Expand Up @@ -93,6 +96,7 @@ summariseClinicalRecords <- function(cdm,
typeConcept = typeConcept,
sex = sex,
ageGroup = ageGroup,
sample = sample,
dateRange = dateRange
)
}) |>
Expand All @@ -112,6 +116,7 @@ summariseClinicalRecord <- function(omopTableName,
typeConcept,
sex,
ageGroup,
sample,
dateRange,
call = parent.frame(3)) {

Expand All @@ -124,8 +129,8 @@ summariseClinicalRecord <- function(omopTableName,

omopTable <- cdm[[omopTableName]] |>
dplyr::ungroup()

omopTable <- restrictStudyPeriod(omopTable, dateRange)
omopTable <- sampleOmopTable(omopTable, sample)
if(omopgenerics::isTableEmpty(omopTable)) {
return(omopgenerics::emptySummarisedResult(settings = createSettings(result_type = "summarise_clinical_records", study_period = dateRange)))
}
Expand Down
46 changes: 29 additions & 17 deletions R/summariseConceptSetCounts.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
#' @param sex TRUE or FALSE. If TRUE code use will be summarised by sex.
#' @param ageGroup A list of ageGroup vectors of length two. Code use will be
#' thus summarised by age groups.
#' @param sample An integer to sample the tables in the cdm object to only that number of records.
#' If NULL no sample is done.
#' @param dateRange A list containing the minimum and the maximum dates
#' defining the time range within which the analysis is performed.
#' @return A summarised_result object with results overall and, if specified, by
Expand Down Expand Up @@ -37,6 +39,7 @@ summariseConceptSetCounts <- function(cdm,
interval = "overall",
sex = FALSE,
ageGroup = NULL,
sample = 1000000,
dateRange = NULL){

omopgenerics::validateCdmArgument(cdm)
Expand Down Expand Up @@ -74,6 +77,7 @@ summariseConceptSetCounts <- function(cdm,
unitInterval = unitInterval,
sex = sex,
ageGroup = ageGroup,
sample = sample,
dateRange = dateRange)
Sys.sleep(i/length(conceptSet))
cli::cli_progress_update()
Expand Down Expand Up @@ -107,6 +111,7 @@ getCodeUse <- function(x,
unitInterval,
sex,
ageGroup,
sample,
dateRange,
call = parent.frame()){

Expand Down Expand Up @@ -150,26 +155,27 @@ getCodeUse <- function(x,
records <- getRelevantRecords(cdm = cdm,
tableCodelist = tableCodelist,
intermediateTable = intermediateTable,
tablePrefix = tablePrefix)
if(is.null(records)){
tablePrefix = tablePrefix,
sample = sample, dateRange = dateRange)

if(is.null(records) || omopgenerics::isTableEmpty(records)){
cc <- dplyr::tibble()
cli::cli_inform(c(
"i" = "No records found in the cdm for the concepts provided."
))
return(omopgenerics::emptySummarisedResult(settings = createSettings(result_type = "summarise_concept_set_counts", study_period = dateRange)))
}

if (!is.null(dateRange))
{
records <- records |>
dplyr::filter(
as.Date(date) >= !!dateRange[1]& as.Date(date) <= !!dateRange[2]
)
if (is.null(warningEmptyStudyPeriod(records))){
return(tibble::tibble())
}

}
# if (!is.null(dateRange))
# {
# records <- records |>
# dplyr::filter(
# as.Date(date) >= !!dateRange[1]& as.Date(date) <= !!dateRange[2]
# )
# if (is.null(warningEmptyStudyPeriod(records))){
# return(tibble::tibble())
# }
# }
records <- addStrataToOmopTable(records, "date", ageGroup, sex)
strata <- getStrataList(sex, ageGroup)

Expand Down Expand Up @@ -238,7 +244,8 @@ getCodeUse <- function(x,
getRelevantRecords <- function(cdm,
tableCodelist,
intermediateTable,
tablePrefix){
tablePrefix,
sample, dateRange){

codes <- cdm[[tableCodelist]] |> dplyr::collect()

Expand All @@ -248,9 +255,11 @@ getRelevantRecords <- function(cdm,
dateName <- purrr::discard(unique(codes$start_date), is.na)

if(length(tableName)>0){
codeRecords <- cdm[[tableName[[1]]]]
codeRecords <- cdm[[tableName[[1]]]]|>
restrictStudyPeriod(dateRange)|>
sampleOmopTable(sample)

if(is.null(codeRecords)){return(NULL)}
if(is.null(codeRecords) || omopgenerics::isTableEmpty(codeRecords)){return(NULL)}

tableCodes <- paste0(tablePrefix, "table_codes")

Expand Down Expand Up @@ -290,7 +299,10 @@ getRelevantRecords <- function(cdm,
# get for any additional domains and union
if(length(tableName) > 1) {
for(i in 1:(length(tableName)-1)) {
workingRecords <- cdm[[tableName[[i+1]]]]
workingRecords <- cdm[[tableName[[i+1]]]] |>
restrictStudyPeriod(dateRange)|>
sampleOmopTable(sample)
if(is.null(workingRecords) || omopgenerics::isTableEmpty(workingRecords)){return(NULL)}

workingRecords <- workingRecords %>%
dplyr::mutate(date = !!dplyr::sym(dateName[[i+1]])) %>%
Expand Down
15 changes: 12 additions & 3 deletions R/summariseRecordCount.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
#' @param sex Whether to stratify by sex (TRUE) or not (FALSE).
#' @param dateRange A list containing the minimum and the maximum dates
#' defining the time range within which the analysis is performed.
#' @param sample An integer to sample the tables to only that number of records.
#' If NULL no sample is done.
#' @return A summarised_result object.
#' @export
#' @examples
Expand All @@ -35,7 +37,9 @@ summariseRecordCount <- function(cdm,
interval = "overall",
ageGroup = NULL,
sex = FALSE,
dateRange = NULL) {
sample = 1000000,
dateRange = NULL
) {

# Initial checks ----
omopgenerics::validateCdmArgument(cdm)
Expand All @@ -48,6 +52,7 @@ summariseRecordCount <- function(cdm,
omopgenerics::assertLogical(sex, length = 1)
dateRange <- validateStudyPeriod(cdm, dateRange)


result <- purrr::map(omopTableName,
function(x) {
omopgenerics::assertClass(cdm[[x]], "omop_table", call = parent.frame())
Expand All @@ -66,7 +71,9 @@ summariseRecordCount <- function(cdm,
original_interval,
ageGroup = ageGroup,
sex = sex,
dateRange = dateRange)
sample = sample,
dateRange = dateRange
)
}
) |>
dplyr::bind_rows()
Expand All @@ -76,10 +83,12 @@ summariseRecordCount <- function(cdm,

#' @noRd
summariseRecordCountInternal <- function(omopTableName, cdm, interval, unitInterval,
original_interval, ageGroup, sex, dateRange) {
original_interval, ageGroup, sex, sample, dateRange) {

prefix <- omopgenerics::tmpPrefix()
omopTable <- cdm[[omopTableName]] |> dplyr::ungroup()
omopTable <- restrictStudyPeriod(omopTable, dateRange)
omopTable <- sampleOmopTable(omopTable, sample)

# Create initial variables ----

Expand Down
13 changes: 13 additions & 0 deletions R/utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -83,4 +83,17 @@ createSettings <- function(result_type, result_id = 1L, package_name = "OmopSket
return(settings)
}

sampleOmopTable <- function(omopTable, sample){
sampling <- !is.null(sample) & !is.infinite(sample)

if (sampling & omopTable |> dplyr::tally() |> dplyr::pull() <= sample) {
sampling <- FALSE
}

if (sampling){
omopTable <- omopTable |>
dplyr::slice_sample(n = sample)
}
return(omopTable)
}

4 changes: 4 additions & 0 deletions man/summariseAllConceptCounts.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions man/summariseClinicalRecords.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions man/summariseConceptSetCounts.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions man/summariseRecordCount.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions tests/testthat/test-summariseAllConceptCounts.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,20 @@ test_that("dateRange argument works", {
expect_equal(colnames(settings(y)), colnames(settings(x)))
PatientProfiles::mockDisconnect(cdm = cdm)
})
test_that("sample argument works", {
skip_on_cran()
# Load mock database ----
cdm <- cdmEunomia()

expect_no_error(x<-summariseAllConceptCounts(cdm,"drug_exposure", sample = 50))
expect_no_error(y<-summariseAllConceptCounts(cdm,"drug_exposure"))
n <- cdm$drug_exposure |>
dplyr::tally()|>
dplyr::pull(n)
expect_no_error(z<-summariseAllConceptCounts(cdm,"drug_exposure",sample = n))
expect_equal(y,z)
PatientProfiles::mockDisconnect(cdm = cdm)
})

test_that("tableAllConceptCounts() works", {
skip_on_cran()
Expand Down
15 changes: 15 additions & 0 deletions tests/testthat/test-summariseClinicalRecords.R
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,21 @@ test_that("dateRange argument works", {

})

test_that("sample argument works", {
skip_on_cran()
# Load mock database ----
cdm <- cdmEunomia()

expect_no_error(x<-summariseClinicalRecords(cdm,"drug_exposure", sample = 50))
expect_no_error(y<-summariseClinicalRecords(cdm,"drug_exposure"))
n <- cdm$drug_exposure |>
dplyr::tally()|>
dplyr::pull(n)
expect_no_error(z<-summariseClinicalRecords(cdm,"drug_exposure",sample = n))
expect_equal(y,z)
PatientProfiles::mockDisconnect(cdm = cdm)
})

test_that("tableClinicalRecords() works", {
skip_on_cran()
# Load mock database ----
Expand Down
16 changes: 16 additions & 0 deletions tests/testthat/test-summariseConceptSetCounts.R
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,22 @@ test_that("dateRange argument works", {
expect_equal(colnames(settings(z)), colnames(settings(x)))
PatientProfiles::mockDisconnect(cdm = cdm)
})

test_that("sample argument works", {
skip_on_cran()
# Load mock database ----
cdm <- cdmEunomia()

expect_no_error(d<-summariseConceptSetCounts(cdm,conceptSet = list("zoster vax" = c(40213260)), sample = 50))
expect_no_error(y<-summariseConceptSetCounts(cdm,conceptSet = list("zoster vax" = c(40213260))))
n <- cdm$drug_exposure |>
dplyr::tally()|>
dplyr::pull(n)
expect_no_error(z<-summariseConceptSetCounts(cdm,conceptSet = list("zoster vax" = c(40213260)),sample = n))
expect_equal(y,z)
PatientProfiles::mockDisconnect(cdm = cdm)
})

test_that("interval argument works", {
skip_on_cran()
# Load mock database ----
Expand Down
Loading

0 comments on commit 5c31e8b

Please sign in to comment.