Skip to content

Commit 8a6068d

Browse files
committed
Completely different approach to event-dep-exposure diagnostic
1 parent 49b781b commit 8a6068d

File tree

3 files changed

+128
-134
lines changed

3 files changed

+128
-134
lines changed

R/Diagnostics.R

+114-129
Original file line numberDiff line numberDiff line change
@@ -362,99 +362,6 @@ computeEventDependentObservationP <- function(sccsModel) {
362362
}
363363

364364

365-
computeExposureDaysToEvent <- function(studyPopulation, sccsData, exposureEraId) {
366-
# This number of days before exposure start are assumed to be dealt with and are removed from
367-
# both numerator (exposure days) and denominator (observation days):
368-
preExposureDays <- 60 + 1
369-
370-
cases <- studyPopulation$cases |>
371-
select("caseId", "startDay", "endDay")
372-
373-
# Keep only exposures that overlap with the observation periods of the study population (also
374-
# truncate exposures to the observation period):
375-
exposures <- sccsData$eras |>
376-
filter(.data$eraId == exposureEraId & .data$eraType == "rx") |>
377-
inner_join(cases,
378-
by = join_by("caseId", "eraEndDay" >= "startDay", "eraStartDay" < "endDay"),
379-
copy = TRUE) |>
380-
collect() |>
381-
mutate(eraStartDay = pmax(eraStartDay, startDay),
382-
eraEndDay = pmin(eraEndDay, endDay))
383-
384-
if (nrow(exposures) == 0) {
385-
warning("No exposures found with era ID ", exposureEraId)
386-
return(NULL)
387-
}
388-
firstOutcomes <- studyPopulation$outcomes |>
389-
group_by(.data$caseId) |>
390-
filter(row_number(.data$outcomeDay) == 1)
391-
392-
# Merge overlapping exposures if needed:
393-
# exposures <- exposures |>
394-
# arrange(caseId, eraStartDay) |>
395-
# group_by(caseId) |>
396-
# mutate(newGroup = cumsum(lag(eraEndDay, default = first(eraEndDay)) < eraStartDay)) |>
397-
# group_by(caseId, newGroup) |>
398-
# summarise(
399-
# eraStartDay = min(eraStartDay),
400-
# eraEndDay = max(eraEndDay),
401-
# .groups = 'drop'
402-
# ) |>
403-
# select(caseId, eraStartDay, eraEndDay)
404-
405-
# Ensure at least <preExposureDays> before each exposure start, by moving end day back:
406-
truncatedExposures <- exposures |>
407-
arrange(caseId, eraStartDay) |>
408-
group_by(caseId) |>
409-
mutate(
410-
eraEndDay = ifelse(lead(eraStartDay, default = Inf) - eraEndDay < preExposureDays, lead(eraStartDay) - preExposureDays, eraEndDay)
411-
) |>
412-
filter(eraEndDay > eraStartDay) |>
413-
select(caseId, eraStartDay, eraEndDay)
414-
415-
exposureDeltas <- truncatedExposures |>
416-
inner_join(firstOutcomes, by = join_by("caseId")) |>
417-
mutate(deltaExposureStart = .data$eraStartDay - .data$outcomeDay,
418-
deltaExposureEnd = .data$eraEndDay - .data$outcomeDay) |>
419-
select("caseId", "deltaExposureStart", "deltaExposureEnd")
420-
421-
# Remove pre-exposure time from observation periods:
422-
joined <- studyPopulation$cases |>
423-
select(caseId, startDay, endDay) |>
424-
# left_join(truncatedExposures, by = "caseId") |>
425-
left_join(exposures |>
426-
select("caseId", "eraStartDay", "eraEndDay"),
427-
by = "caseId") |>
428-
arrange(caseId, eraStartDay)
429-
truncatedObservationPeriods <- joined |>
430-
group_by(caseId) |>
431-
mutate(
432-
periodStart = lag(eraStartDay, default = first(startDay)),
433-
periodEnd = pmin(eraStartDay - preExposureDays, endDay),
434-
lastPeriodStart = eraStartDay,
435-
lastPeriodEnd = endDay
436-
) |>
437-
filter(periodStart <= periodEnd) |>
438-
select(caseId, start = periodStart, end = periodEnd) |>
439-
bind_rows(
440-
joined |>
441-
group_by(caseId) |>
442-
slice(n()) |>
443-
transmute(caseId, start = if_else(is.na(eraStartDay), startDay, eraStartDay), end = endDay) |>
444-
filter(start <= end)
445-
) |>
446-
arrange(caseId, start) |>
447-
ungroup()
448-
449-
observationPeriodDeltas <- truncatedObservationPeriods |>
450-
inner_join(firstOutcomes, by = join_by("caseId")) |>
451-
mutate(deltaStart = .data$start - .data$outcomeDay,
452-
deltaEnd = .data$end - .data$outcomeDay) |>
453-
select("caseId", "deltaStart", "deltaEnd")
454-
455-
return(list(exposureDeltas = exposureDeltas, observationPeriodDeltas = observationPeriodDeltas))
456-
}
457-
458365
#' Compute p for whether exposure probability changed following the outcome
459366
#'
460367
#' @param exposureEraId The exposure to create the era data for. If not specified it is
@@ -463,12 +370,19 @@ computeExposureDaysToEvent <- function(studyPopulation, sccsData, exposureEraId)
463370
#' @template StudyPopulation
464371
#' @template SccsData
465372
#' @param bounds Bounds for the null of no change in the exposure rate.
373+
#' @param ignoreExposureStarts Ignore exposure starts when computing the diagnostic. This makes the
374+
#' diagnostic robust against the outcome temporarily preventing exposure
375+
#' starting, which should be dealt with by the pre-exposure window.
466376
#'
467377
#' @return
468378
#' The p-value
469379
#'
470380
#' @export
471-
computeExposureChangeP <- function(sccsData, studyPopulation, exposureEraId = NULL, bounds = log(c(0.5, 2))) {
381+
computeExposureChangeP <- function(sccsData,
382+
studyPopulation,
383+
exposureEraId = NULL,
384+
bounds = log(c(0.5, 2)),
385+
ignoreExposureStarts = TRUE) {
472386
errorMessages <- checkmate::makeAssertCollection()
473387
checkmate::assertClass(sccsData, "SccsData", add = errorMessages)
474388
checkmate::assertList(studyPopulation, min.len = 1, add = errorMessages)
@@ -481,60 +395,131 @@ computeExposureChangeP <- function(sccsData, studyPopulation, exposureEraId = NU
481395
stop("No exposure ID specified, but multiple exposures found")
482396
}
483397
}
484-
data <- computeExposureDaysToEvent(studyPopulation = studyPopulation,
485-
sccsData = sccsData,
486-
exposureEraId = exposureEraId)
487-
if (is.null(data)) {
488-
return(NA)
489-
}
490-
periods <- dplyr::tibble(afterOutcome = c(0,1),
491-
start = c(-30, 0),
492-
end = c(-1, 30))
493-
494-
exposure <- periods |>
495-
cross_join(data$exposureDeltas) |>
496-
mutate(daysExposure = pmax(0, pmin(end, deltaExposureEnd) - pmax(start, deltaExposureStart) + 1)) |>
497-
group_by(caseId, afterOutcome) |>
498-
summarise(daysExposure = sum(daysExposure), .groups = "drop") |>
499-
select(caseId, afterOutcome, daysExposure)
500-
501-
observation <- periods |>
502-
cross_join(data$observationPeriodDeltas) |>
503-
mutate(daysObserved = pmax(0, pmin(end, deltaEnd) - pmax(start, deltaStart) + 1)) |>
504-
group_by(caseId, afterOutcome) |>
505-
summarise(daysObserved = sum(daysObserved), .groups = "drop") |>
506-
select(caseId, afterOutcome, daysObserved)
507-
508-
casesWithExposure <- exposure |>
509-
distinct(caseId) |>
510-
pull()
511-
512-
poissonData <- observation |>
513-
filter(caseId %in% casesWithExposure & daysObserved > 0) |>
514-
left_join(exposure, by = join_by(caseId, afterOutcome)) |>
398+
cases <- studyPopulation$cases |>
399+
select("caseId", "startDay", "endDay")
400+
401+
# Keep only exposures that overlap with the observation periods of the study population (also
402+
# truncate exposures to the observation period):
403+
exposures <- sccsData$eras |>
404+
filter(.data$eraId == exposureEraId & .data$eraType == "rx") |>
405+
inner_join(cases,
406+
by = join_by("caseId", "eraEndDay" >= "startDay", "eraStartDay" < "endDay"),
407+
copy = TRUE) |>
408+
collect() |>
409+
mutate(eraStartDay = pmax(eraStartDay, startDay),
410+
eraEndDay = pmin(eraEndDay, endDay))
411+
412+
exposures <- exposures |>
413+
arrange(caseId, eraStartDay) |>
414+
group_by(caseId) |>
415+
mutate(newGroup = cumsum(lag(eraEndDay, default = first(eraEndDay)) < eraStartDay)) |>
416+
group_by(caseId, newGroup) |>
417+
summarise(
418+
eraStartDay = min(eraStartDay),
419+
eraEndDay = max(eraEndDay),
420+
.groups = 'drop'
421+
) |>
422+
select(caseId, eraStartDay, eraEndDay)
423+
424+
firstOutcomes <- studyPopulation$outcomes |>
425+
group_by(.data$caseId) |>
426+
filter(row_number(.data$outcomeDay) == 1)
427+
428+
# Compute exposure days before and after outcome, after removing exposures starting in the
429+
# respective windows.
430+
joined <- exposures |>
431+
inner_join(firstOutcomes, by = join_by("caseId")) |>
432+
mutate(deltaExposureStart = .data$eraStartDay - .data$outcomeDay,
433+
deltaExposureEnd = .data$eraEndDay - .data$outcomeDay)
434+
435+
exposureBefore <- joined |>
436+
filter(deltaExposureEnd >= -30 & deltaExposureStart <= -1) |>
437+
filter(!ignoreExposureStarts | deltaExposureStart < -30 | deltaExposureStart > -1) |>
438+
mutate(deltaExposureStart = pmax(deltaExposureStart, -30),
439+
deltaExposureEnd = pmin(deltaExposureEnd, -1)) |>
440+
group_by(caseId) |>
441+
summarise(daysExposed = sum(deltaExposureEnd - deltaExposureStart + 1)) |>
442+
select(caseId, daysExposed)
443+
444+
exposureAfter <- joined |>
445+
filter(deltaExposureEnd >= 0 & deltaExposureStart <= 29) |>
446+
filter(deltaExposureStart < 0 | deltaExposureStart > 29) |>
447+
mutate(deltaExposureStart = pmax(deltaExposureStart, 0),
448+
deltaExposureEnd = pmin(deltaExposureEnd, 29)) |>
449+
group_by(caseId) |>
450+
summarise(daysExposed = sum(deltaExposureEnd - deltaExposureStart + 1)) |>
451+
select(caseId, daysExposed)
452+
453+
# Compute days observed
454+
joined <- firstOutcomes |>
455+
inner_join(studyPopulation$cases, by = join_by("caseId")) |>
456+
mutate(deltaObservationStart = .data$startDay - .data$outcomeDay,
457+
deltaObservationEnd = .data$endDay - .data$outcomeDay)
458+
459+
observationBefore <- joined |>
460+
filter(deltaObservationEnd >= -30 & deltaObservationStart <= -1) |>
461+
filter(deltaObservationStart < -30 | deltaObservationStart > -1) |>
462+
mutate(deltaObservationStart = pmax(deltaObservationStart, -30),
463+
deltaObservationEnd = pmin(deltaObservationEnd, -1)) |>
464+
group_by(caseId) |>
465+
summarise(daysObserved = sum(deltaObservationEnd - deltaObservationStart + 1)) |>
466+
select(caseId, daysObserved)
467+
468+
observationAfter <- joined |>
469+
filter(deltaObservationEnd >= 0 & deltaObservationStart <= 29) |>
470+
filter(deltaObservationStart < 0 | deltaObservationStart > 29) |>
471+
mutate(deltaObservationStart = pmax(deltaObservationStart, 0),
472+
deltaObservationEnd = pmin(deltaObservationEnd, 29)) |>
473+
group_by(caseId) |>
474+
summarise(daysObserved = sum(deltaObservationEnd - deltaObservationStart + 1)) |>
475+
select(caseId, daysObserved)
476+
477+
poissonData <- left_join(
478+
bind_rows(
479+
observationBefore |>
480+
mutate(afterOutcome = 0),
481+
observationAfter |>
482+
mutate(afterOutcome = 1)
483+
),
484+
bind_rows(
485+
exposureBefore |>
486+
mutate(afterOutcome = 0),
487+
exposureAfter |>
488+
mutate(afterOutcome = 1)
489+
),
490+
by = join_by("caseId", "afterOutcome")
491+
) |>
492+
filter(daysObserved > 0) |>
515493
mutate(
516494
rowId = row_number(),
517-
covariateId = 1
495+
covariateId = 1,
496+
daysExposed = if_else(is.na(daysExposed), 0, daysExposed)
518497
) |>
519498
select(
520499
"rowId",
521500
stratumId = "caseId",
522501
"covariateId",
523502
covariateValue = "afterOutcome",
524503
time = "daysObserved",
525-
y = "daysExposure"
504+
y = "daysExposed"
526505
)
527506

507+
casesWithExposure <- poissonData |>
508+
filter(y > 0) |>
509+
pull(stratumId)
510+
528511
poissonData <- poissonData |>
529-
filter((covariateValue == 0 & time == 30) | (covariateValue == 1 & time == 31))
512+
filter(stratumId %in% casesWithExposure)
530513

531514
cyclopsData <- Cyclops::convertToCyclopsData(outcomes = poissonData,
532515
covariates = poissonData,
533516
addIntercept = FALSE,
534517
modelType = "cpr",
535518
quiet = TRUE)
536519
fit <- Cyclops::fitCyclopsModel(cyclopsData)
537-
fit$log_likelihood
520+
if (fit$return_flag != "SUCCESS") {
521+
return(NA)
522+
}
538523
logRr <- coef(fit)
539524
if (logRr >= bounds[1] && logRr <= bounds[2]) {
540525
llNull <- fit$log_likelihood

extras/EndOfExposureSimulations.R

+8-4
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ writeLines(sprintf("Number of simulation scenarios: %d", length(scenarios)))
5555
# Run simulations ----------------------------------------------------------------------------------
5656
folder <- "e:/SccsEdeSimulations100"
5757

58-
scenario = scenarios[[30]]
58+
scenario = scenarios[[34]]
5959
scenario$censorType
6060

6161
simulateOne <- function(seed, scenario) {
@@ -180,14 +180,16 @@ simulateOne <- function(seed, scenario) {
180180
idx2 <- which(estimates$covariateId == 1001)
181181
p <- computeExposureChangeP(sccsData, studyPop, 1)
182182
p
183+
p2 <- computeExposureChangeP(sccsData, studyPop, 1, ignoreExposureStarts = FALSE)
183184
# plotExposureCentered(studyPop, sccsData, 1)
184185
# plotOutcomeCentered(studyPop, sccsData, 1)
185186

186187
row <- tibble(logRr = estimates$logRr[idx1],
187188
ci95Lb = exp(estimates$logLb95[idx1]),
188189
ci95Ub = exp(estimates$logUb95[idx1]),
189190
diagnosticEstimate = exp(estimates$logRr[idx2]),
190-
diagnosticP = p)
191+
diagnosticP = p,
192+
diagnosticP2 = p2)
191193
return(row)
192194
}
193195

@@ -213,11 +215,13 @@ for (i in seq_along(scenarios)) {
213215
metrics <- results |>
214216
mutate(coverage = ci95Lb < scenario$trueRr & ci95Ub > scenario$trueRr,
215217
diagnosticEstimate = log(diagnosticEstimate),
216-
failDiagnostic = diagnosticP < 0.05) |>
218+
failDiagnostic = diagnosticP < 0.05,
219+
failDiagnostic2 = diagnosticP2 < 0.05) |>
217220
summarise(coverage = mean(coverage, na.rm = TRUE),
218221
bias = mean(logRr - log(scenario$trueRr), na.rm = TRUE),
219222
meanDiagnosticEstimate = exp(mean(diagnosticEstimate, na.rm = TRUE)),
220-
fractionFailingDiagnostic = mean(failDiagnostic, na.rm = TRUE))
223+
fractionFailingDiagnostic = mean(failDiagnostic, na.rm = TRUE),
224+
fractionFailingDiagnostic2 = mean(failDiagnostic2, na.rm = TRUE))
221225
metrics
222226
row <- as_tibble(scenarioKey) |>
223227
bind_cols(metrics)

man/computeExposureChangeP.Rd

+6-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)