From 70ec9a01ff3d0fa9bd76bc192d2aa905e27ecb28 Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Fri, 28 Nov 2025 13:47:19 +0800 Subject: [PATCH 01/51] init commit --- .lintr | 6 + DESCRIPTION | 1 + NAMESPACE | 4 + R/assert.R | 88 +++++++ R/from_formatters.R | 31 +++ R/gkm.R | 560 +++++++++++++++++++++++++++++++++++++++++++ man/g_km.Rd | 41 ++++ man/obj_label-set.Rd | 14 ++ 8 files changed, 745 insertions(+) create mode 100644 .lintr create mode 100644 R/assert.R create mode 100644 R/from_formatters.R create mode 100644 R/gkm.R create mode 100644 man/g_km.Rd create mode 100644 man/obj_label-set.Rd diff --git a/.lintr b/.lintr new file mode 100644 index 00000000..c514f393 --- /dev/null +++ b/.lintr @@ -0,0 +1,6 @@ +linters: linters_with_defaults( + line_length_linter = line_length_linter(120), + cyclocomp_linter = NULL, + object_usage_linter = NULL, + object_length_linter = NULL + ) diff --git a/DESCRIPTION b/DESCRIPTION index e2702d27..7f9ba55e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,6 +26,7 @@ Imports: broom (>= 1.0.8), cards (>= 0.7.0), cardx (>= 0.3.0), + checkmate (>= 2.3.2), cli (>= 3.6.4), dplyr (>= 1.1.4), flextable (>= 0.9.7), diff --git a/NAMESPACE b/NAMESPACE index 124285cd..2484284d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,10 +7,12 @@ S3method(add_overall,tbl_shift) S3method(add_overall,tbl_survfit_quantiles) S3method(add_overall,tbl_survfit_times) export("%>%") +export("obj_label<-") export(add_blank_rows) export(add_hierarchical_count_row) export(add_overall) export(filter_hierarchical) +export(g_km) export(label_roche_number) export(label_roche_percent) export(label_roche_pvalue) @@ -34,6 +36,8 @@ export(tbl_shift) export(tbl_survfit_quantiles) export(tbl_survfit_times) export(theme_gtsummary_roche) +exportMethods("obj_label<-") +exportMethods(obj_label) import(glue) import(rlang) importFrom(dplyr,"%>%") diff --git a/R/assert.R b/R/assert.R new file mode 100644 index 00000000..9f2c08e6 --- /dev/null +++ b/R/assert.R @@ -0,0 +1,88 @@ +assert_proportion_value <- function (x, include_boundaries = FALSE) +{ + checkmate::assert_number(x, lower = 0, upper = 1) + checkmate::assert_flag(include_boundaries) + if (isFALSE(include_boundaries)) { + checkmate::assert_true(x > 0) + checkmate::assert_true(x < 1) + } +} + +check_list_of_variables <- function (x) +{ + x <- Filter(Negate(is.null), x) + res <- checkmate::check_list(x, names = "named", min.len = 1, + any.missing = FALSE, types = "character") + if (isTRUE(res)) { + res <- checkmate::check_character(unlist(x), min.chars = 1) + } + res +} + +assert_list_of_variables <- function (x, .var.name = checkmate::vname(x), add = NULL) +{ + if (missing(x)) + stop(sprintf("argument \"%s\" is missing, with no default", + .var.name)) + res = check_list_of_variables(x) + checkmate::makeAssertion(x, res, .var.name, add) +} + +check_df_with_variables <- function (df, variables, na_level = NULL) +{ + checkmate::assert_data_frame(df) + assert_list_of_variables(variables) + err_flag <- all(unlist(variables) %in% colnames(df)) + checkmate::assert_flag(err_flag) + if (isFALSE(err_flag)) { + vars <- setdiff(unlist(variables), colnames(df)) + return(paste(deparse(substitute(df)), "does not contain all specified variables as column names. Missing from data frame:", + paste(vars, collapse = ", "))) + } + if (!is.null(na_level)) { + checkmate::assert_string(na_level) + res <- unlist(lapply(as.list(df)[unlist(variables)], + function(x) any(x == na_level))) + if (any(res)) { + return(paste0(deparse(substitute(df)), " contains explicit na_level (", + na_level, ") in the following columns: ", paste0(unlist(variables)[res], + collapse = ", "))) + } + } + return(TRUE) +} + +assert_df_with_variables <- function (df, variables, na_level = NULL, .var.name = checkmate::vname(df), + add = NULL) +{ + if (missing(df)) + stop(sprintf("argument \"%s\" is missing, with no default", + .var.name)) + res = check_df_with_variables(df, variables, na_level) + checkmate::makeAssertion(df, res, .var.name, add) +} + +check_valid_factor <- function (x, min.levels = 1, max.levels = NULL, null.ok = TRUE, + any.missing = TRUE, n.levels = NULL, len = NULL) +{ + checkmate::assert_int(min.levels, lower = 1) + res <- checkmate::check_factor(x, min.levels = min.levels, + null.ok = null.ok, max.levels = max.levels, any.missing = any.missing, + n.levels = n.levels) + if (isTRUE(res)) { + res <- checkmate::check_character(levels(x), min.chars = 1) + } + return(res) +} + +assert_valid_factor <- function (x, min.levels = 1, max.levels = NULL, null.ok = TRUE, + any.missing = TRUE, n.levels = NULL, len = NULL, .var.name = checkmate::vname(x), + add = NULL) +{ + if (missing(x)) + stop(sprintf("argument \"%s\" is missing, with no default", + .var.name)) + res = check_valid_factor(x, min.levels, max.levels, null.ok, + any.missing, n.levels, len) + checkmate::makeAssertion(x, res, .var.name, add) +} diff --git a/R/from_formatters.R b/R/from_formatters.R new file mode 100644 index 00000000..218adba7 --- /dev/null +++ b/R/from_formatters.R @@ -0,0 +1,31 @@ +# ## Changelog +# nocov start +# styler: off + +setGeneric("obj_label", function(obj) standardGeneric("obj_label")) + +#' The new label +#' @param value character(1). The new label +#' @export +setGeneric("obj_label<-", function(obj, value) standardGeneric("obj_label<-")) + +#' @exportMethod obj_label +setMethod("obj_label", "ANY", function(obj) attr(obj, "label")) + +#' @exportMethod obj_label<- +setMethod( + "obj_label<-", "ANY", + function(obj, value) { + attr(obj, "label") <- value + obj + } +) + +with_label <- function (x, label) +{ + obj_label(x) <- label + x +} + +# nocov end +# styler: on diff --git a/R/gkm.R b/R/gkm.R new file mode 100644 index 00000000..97d85993 --- /dev/null +++ b/R/gkm.R @@ -0,0 +1,560 @@ +control_surv_timepoint <- function (conf_level = 0.95, conf_type = c("plain", "log", "log-log")) +{ + conf_type <- match.arg(conf_type) + assert_proportion_value(conf_level) + list(conf_level = conf_level, conf_type = conf_type) +} + +control_coxph <- function (pval_method = c("log-rank", "wald", "likelihood"), + ties = c("efron", "breslow", "exact"), conf_level = 0.95) +{ + pval_method <- match.arg(pval_method) + ties <- match.arg(ties) + assert_proportion_value(conf_level) + list(pval_method = pval_method, ties = ties, conf_level = conf_level) +} + +control_surv_med_annot <- function(x = 0.8, y = 0.85, w = 0.32, h = 0.16, fill = TRUE) { + assert_proportion_value(x) + assert_proportion_value(y) + assert_proportion_value(w) + assert_proportion_value(h) + + list(x = x, y = y, w = w, h = h, fill = fill) +} + +control_coxph_annot <- function(x = 0.29, y = 0.51, w = 0.4, h = 0.125, fill = TRUE, ref_lbls = FALSE) { + checkmate::assert_logical(ref_lbls, any.missing = FALSE) + + res <- c(control_surv_med_annot(x = x, y = y, w = w, h = h), list(ref_lbls = ref_lbls)) + res +} + + +## ---------------------------------------------------------------------------- +## 3. Helper Functions (Formatting, Data Preparation, Plotting Utilities) +## ---------------------------------------------------------------------------- + +f_conf_level <- function (conf_level) +{ + assert_proportion_value(conf_level) + paste0(conf_level * 100, "% CI") +} + +df2gg <- function (df, colwidths = NULL, font_size = 10, col_labels = TRUE, + col_lab_fontface = "bold", hline = TRUE, bg_fill = NULL) +{ + df <- as.data.frame(apply(df, 1:2, function(x) if (is.na(x)) + "NA" + else as.character(x))) + if (col_labels) { + df <- as.matrix(df) + df <- rbind(colnames(df), df) + } + if (is.null(colwidths)) { + colwidths <- apply(df, 2, function(x) max(nchar(x), na.rm = TRUE)) + } + tot_width <- sum(colwidths) + res <- ggplot2::ggplot(data = df) + theme_void() + scale_x_continuous(limits = c(0, + tot_width)) + scale_y_continuous(limits = c(1, nrow(df))) + if (!is.null(bg_fill)) + res <- res + theme(plot.background = element_rect(fill = bg_fill)) + if (hline) { + res <- res + annotate("segment", x = 0 + 0.2 * colwidths[2], + xend = tot_width - 0.1 * tail(colwidths, 1), y = nrow(df) - + 0.5, yend = nrow(df) - 0.5) + } + for (i in seq_len(ncol(df))) { + line_pos <- c(if (i == 1) 0 else sum(colwidths[1:(i - + 1)]), sum(colwidths[1:i])) + res <- res + annotate("text", x = mean(line_pos), y = rev(seq_len(nrow(df))), + label = df[, i], size = font_size/.pt, fontface = if (col_labels) { + c(col_lab_fontface, rep("plain", nrow(df) - 1)) + } + else { + rep("plain", nrow(df)) + }) + } + res +} + +h_xticks <- function(data, xticks = NULL, max_time = NULL) { + if (is.null(xticks)) { + if (is.null(max_time)) { + labeling::extended(range(data$time)[1], range(data$time)[2], m = 5) + } else { + labeling::extended(range(data$time)[1], max(range(data$time)[2], max_time), m = 5) + } + } else if (checkmate::test_number(xticks)) { + if (is.null(max_time)) { + seq(0, max(data$time), xticks) + } else { + seq(0, max(data$time, max_time), xticks) + } + } else if (is.numeric(xticks)) { + xticks + } else { + stop( + paste( + "xticks should be either `NULL`", + "or a single number (interval between x ticks)", + "or a numeric vector (position of ticks on the x axis)" + ) + ) + } +} + +h_tbl_median_surv <- function(fit_km, armval = "All") { + y <- if (is.null(fit_km$strata)) { + as.data.frame(t(summary(fit_km)$table), row.names = armval) + } else { + tbl <- summary(fit_km)$table + rownames_lst <- strsplit(sub("=", "equals", rownames(tbl)), "equals") + rownames(tbl) <- matrix(unlist(rownames_lst), ncol = 2, byrow = TRUE)[, 2] + as.data.frame(tbl) + } + conf.int <- summary(fit_km)$conf.int # nolint + y$records <- round(y$records) + y$median <- signif(y$median, 4) + y$`CI` <- paste0( + "(", signif(y[[paste0(conf.int, "LCL")]], 4), ", ", signif(y[[paste0(conf.int, "UCL")]], 4), ")" + ) + stats::setNames( + y[c("records", "median", "CI")], + c("N", "Median", f_conf_level(conf.int)) + ) +} + +h_tbl_coxph_pairwise <- function(df, + variables, + ref_group_coxph = NULL, + control_coxph_pw = control_coxph(), + annot_coxph_ref_lbls = FALSE) { + + assert_df_with_variables(df, variables) + checkmate::assert_choice(ref_group_coxph, levels(df[[variables$arm]]), null.ok = TRUE) + checkmate::assert_flag(annot_coxph_ref_lbls) + + arm <- variables$arm + df[[arm]] <- factor(df[[arm]]) + + ref_group <- if (!is.null(ref_group_coxph)) ref_group_coxph else levels(df[[variables$arm]])[1] + comp_group <- setdiff(levels(df[[arm]]), ref_group) + + results <- Map(function(comp) { + res <- s_coxph_pairwise( + df = df[df[[arm]] == comp, , drop = FALSE], + .ref_group = df[df[[arm]] == ref_group, , drop = FALSE], + .in_ref_col = FALSE, + .var = variables$tte, + is_event = variables$is_event, + strata = variables$strata, + control = control_coxph_pw + ) + res_df <- data.frame( + hr = format(round(res$hr, 2), nsmall = 2), + hr_ci = paste0( + "(", format(round(res$hr_ci[1], 2), nsmall = 2), ", ", + format(round(res$hr_ci[2], 2), nsmall = 2), ")" + ), + pvalue = if (res$pvalue < 0.0001) "<0.0001" else format(round(res$pvalue, 4), 4), + stringsAsFactors = FALSE + ) + colnames(res_df) <- c("HR", vapply(res[c("hr_ci", "pvalue")], obj_label, FUN.VALUE = "character")) # nolint + row.names(res_df) <- comp + res_df + }, comp_group) + if (annot_coxph_ref_lbls) names(results) <- paste(comp_group, "vs.", ref_group) + + do.call(rbind, results) +} + +h_data_plot <- function(fit_km, + armval = "All", + max_time = NULL) { + y <- broom::tidy(fit_km) + + if (!is.null(fit_km$strata)) { + fit_km_var_level <- strsplit(sub("=", "equals", names(fit_km$strata)), "equals") + strata_levels <- vapply(fit_km_var_level, FUN = "[", FUN.VALUE = "a", i = 2) + strata_var_level <- strsplit(sub("=", "equals", y$strata), "equals") + y$strata <- factor( + vapply(strata_var_level, FUN = "[", FUN.VALUE = "a", i = 2), + levels = strata_levels + ) + } else { + y$strata <- armval + } + + y_by_strata <- split(y, y$strata) + y_by_strata_extended <- lapply( + y_by_strata, + FUN = function(tbl) { + first_row <- tbl[1L, ] + first_row$time <- 0 + first_row$n.risk <- sum(first_row[, c("n.risk", "n.event", "n.censor")]) + first_row$n.event <- first_row$n.censor <- 0 + first_row$estimate <- first_row$conf.high <- first_row$conf.low <- 1 + first_row$std.error <- 0 + rbind( + first_row, + tbl + ) + } + ) + y <- do.call(rbind, y_by_strata_extended) + + y$censor <- ifelse(y$n.censor > 0, y$estimate, NA) + if (!is.null(max_time)) { + y <- y[y$time <= max(max_time), ] + } + y +} + + +## ---------------------------------------------------------------------------- +## 4. Core Statistical Function +## ---------------------------------------------------------------------------- + +s_coxph_pairwise <- + function (df, .ref_group, .in_ref_col, .var, is_event, strata = NULL, + control = control_coxph(), ...) + { + checkmate::assert_string(.var) + checkmate::assert_numeric(df[[.var]]) + checkmate::assert_logical(df[[is_event]]) + assert_df_with_variables(df, list(tte = .var, is_event = is_event)) + pval_method <- control$pval_method + ties <- control$ties + conf_level <- control$conf_level + if (.in_ref_col) { + return(list(pvalue = with_label(numeric(), + paste0("p-value (", pval_method, ")")), hr = with_label(numeric(), + "Hazard Ratio"), hr_ci = with_label(numeric(), + f_conf_level(conf_level)), hr_ci_3d = with_label(numeric(), + paste0("Hazard Ratio (", f_conf_level(conf_level), + ")")), n_tot = with_label(numeric(), + "Total n"), n_tot_events = with_label(numeric(), + "Total events"))) + } + data <- rbind(.ref_group, df) + group <- factor(rep(c("ref", "x"), c(nrow(.ref_group), nrow(df))), + levels = c("ref", "x")) + df_cox <- data.frame(tte = data[[.var]], is_event = data[[is_event]], + arm = group) + if (is.null(strata)) { + formula_cox <- survival::Surv(tte, is_event) ~ arm + } + else { + formula_cox <- stats::as.formula(paste0("survival::Surv(tte, is_event) ~ arm + strata(", + paste(strata, collapse = ","), ")")) + df_cox <- cbind(df_cox, data[strata]) + } + cox_fit <- survival::coxph(formula = formula_cox, data = df_cox, + ties = ties) + sum_cox <- summary(cox_fit, conf.int = conf_level, extend = TRUE) + orginal_survdiff <- survival::survdiff(formula_cox, data = df_cox) + log_rank_pvalue <- 1 - pchisq(orginal_survdiff$chisq, length(orginal_survdiff$n) - + 1) + pval <- switch(pval_method, wald = sum_cox$waldtest["pvalue"], + `log-rank` = log_rank_pvalue, likelihood = sum_cox$logtest["pvalue"]) + list(pvalue = with_label(unname(pval), paste0("p-value (", + pval_method, ")")), hr = with_label(sum_cox$conf.int[1, + 1], "Hazard Ratio"), hr_ci = with_label(unname(sum_cox$conf.int[1, + 3:4]), f_conf_level(conf_level)), hr_ci_3d = with_label(c(sum_cox$conf.int[1, + 1], unname(sum_cox$conf.int[1, 3:4])), paste0("Hazard Ratio (", + f_conf_level(conf_level), ")")), n_tot = with_label(sum_cox$n, + "Total n"), n_tot_events = with_label(sum_cox$nevent, + "Total events")) + } + + +h_km_fit <- function(df, variables, control_surv = control_surv_timepoint()) { + tte <- variables$tte + is_event <- variables$is_event + arm <- variables$arm + + assert_valid_factor(df[[arm]]) + assert_df_with_variables(df, list(tte = tte, is_event = is_event, arm = arm)) + + formula <- stats::as.formula(paste0("survival::Surv(", tte, ", ", is_event, ") ~ ", arm)) + fit_km <- survival::survfit( + formula = formula, + data = df, + conf.int = control_surv$conf_level, + conf.type = control_surv$conf_type + ) + return(fit_km) +} + +#' g_km plot +#' @export +g_km <- function(fit_km, + variables, + coxph_tbl = NULL, # New argument for pre-calculated Cox-PH table + control_surv = control_surv_timepoint(), + col = NULL, + lty = NULL, + lwd = 0.5, + censor_show = TRUE, + pch = 3, + size = 2, + max_time = NULL, + xticks = NULL, + xlab = "Days", + yval = c("Survival", "Failure"), + ylab = paste(yval, "Probability"), + ylim = NULL, + title = NULL, + footnotes = NULL, + font_size = 10, + ci_ribbon = FALSE, + annot_at_risk = TRUE, + annot_at_risk_title = TRUE, + annot_surv_med = TRUE, + control_annot_surv_med = control_surv_med_annot(), + control_annot_coxph = control_coxph_annot(), + legend_pos = NULL, + rel_height_plot = 0.75, + ggtheme = NULL, + as_list = FALSE) { + + # --- Data Extraction and Assertions --- + checkmate::assert_class(fit_km, "survfit") + checkmate::assert_list(variables) + checkmate::assert_subset(c("tte", "arm", "is_event"), names(variables)) + + # 1. Extract arm values (strata names) from the fitted object + # h_data_plot is used here only to consistently get the unique strata levels + armval <- if (is.null(fit_km$strata)) "All" else levels(h_data_plot(fit_km, max_time = 1)$strata) + checkmate::assert_vector(col, len = length(armval), null.ok = TRUE) + + # Check if a Cox-PH table was provided (replaces annot_coxph flag) + if (!is.null(coxph_tbl)) { + checkmate::assert_data_frame(coxph_tbl) + } + + # --- Data Processing --- + yval <- match.arg(yval) + data <- h_data_plot(fit_km, armval = armval, max_time = max_time) + xticks <- h_xticks(data = data, xticks = xticks, max_time = max_time) + + # change estimates of survival to estimates of failure (1 - survival) + if (yval == "Failure") { + data[c("estimate", "conf.low", "conf.high", "censor")] <- list( + 1 - data$estimate, 1 - data$conf.low, 1 - data$conf.high, 1 - data$censor + ) + } + + # derive y-axis limits + if (is.null(ylim)) { + if (!is.null(max_time)) { + y_lwr <- min(data[data$time < max_time, ][["estimate"]]) + y_upr <- max(data[data$time < max_time, ][["estimate"]]) + } else { + y_lwr <- min(data[["estimate"]]) + y_upr <- max(data[["estimate"]]) + } + ylim <- c(y_lwr, y_upr) + } + + # --- ggplot Initialization and Aesthetics (Unchanged) --- + gg_plt <- ggplot2::ggplot( + data = data, + mapping = aes( + x = .data[["time"]], y = .data[["estimate"]], ymin = .data[["conf.low"]], + ymax = .data[["conf.high"]], color = .data[["strata"]], fill = .data[["strata"]] + ) + ) + + theme_bw(base_size = font_size) + + scale_y_continuous(limits = ylim, expand = c(0.025, 0)) + + labs(title = title, x = xlab, y = ylab, caption = footnotes) + + theme( + axis.text = element_text(size = font_size), axis.title = element_text(size = font_size), + legend.title = element_blank(), legend.text = element_text(size = font_size), + legend.box.background = element_rect(fill = "white", linewidth = 0.5), + legend.background = element_blank(), legend.position = "inside", + legend.spacing.y = unit(-0.02, "npc"), panel.grid.major = element_blank(), + panel.grid.minor = element_blank() + ) + + # derive x-axis limits + if (!is.null(max_time) && !is.null(xticks)) { + gg_plt <- gg_plt + scale_x_continuous( + breaks = xticks, limits = c(min(0, xticks), max(c(xticks, max_time))), expand = c(0.025, 0) + ) + } else if (!is.null(xticks)) { + if (max(data$time) <= max(xticks)) { + gg_plt <- gg_plt + scale_x_continuous( + breaks = xticks, limits = c(min(0, min(xticks)), max(xticks)), expand = c(0.025, 0) + ) + } else { + gg_plt <- gg_plt + scale_x_continuous(breaks = xticks, expand = c(0.025, 0)) + } + } else if (!is.null(max_time)) { + gg_plt <- gg_plt + scale_x_continuous(limits = c(0, max_time), expand = c(0.025, 0)) + } + + # set legend position (unchanged logic) + if (!is.null(legend_pos)) { + gg_plt <- gg_plt + theme(legend.position.inside = legend_pos) + } else { + max_time2 <- sort( + data$time, + partial = nrow(data) - length(armval) - 1 + )[nrow(data) - length(armval) - 1] + + y_rng <- ylim[2] - ylim[1] + + if (yval == "Survival" && all(data$estimate[data$time == max_time2] > ylim[1] + 0.09 * y_rng) && + all(data$estimate[data$time == max_time2] < ylim[1] + 0.5 * y_rng)) { # nolint + gg_plt <- gg_plt + + theme( + legend.position.inside = c(1, 0.5), + legend.justification = c(1.1, 0.6) + ) + } else { + gg_plt <- gg_plt + + theme( + legend.position.inside = c(1, 0), + legend.justification = c(1.1, -0.4) + ) + } + } + + # add lines, censor marks, ci ribbon, and colors (unchanged) + gg_plt <- if (is.null(lty)) { + gg_plt + geom_step(linewidth = lwd, na.rm = TRUE) + } else if (length(lty) == 1) { + gg_plt + geom_step(linewidth = lwd, lty = lty, na.rm = TRUE) + } else { + gg_plt + + geom_step(aes(lty = .data[["strata"]]), linewidth = lwd, na.rm = TRUE) + + scale_linetype_manual(values = lty) + } + + if (censor_show) { + gg_plt <- gg_plt + geom_point( + data = data[data$n.censor != 0, ], + aes(x = .data[["time"]], y = .data[["censor"]], shape = "Censored"), + size = size, + na.rm = TRUE + ) + + scale_shape_manual(name = NULL, values = pch) + + guides(fill = guide_legend(override.aes = list(shape = NA))) + } + + if (ci_ribbon) gg_plt <- gg_plt + geom_ribbon(alpha = 0.3, lty = 0, na.rm = TRUE) + + if (!is.null(col)) { + gg_plt <- gg_plt + + scale_color_manual(values = col) + + scale_fill_manual(values = col) + } + if (!is.null(ggtheme)) gg_plt <- gg_plt + ggtheme + + # --- Annotation Tables --- + + # 2. Median survival time annotation table + if (annot_surv_med) { + surv_med_tbl <- h_tbl_median_surv(fit_km = fit_km, armval = armval) + bg_fill <- if (isTRUE(control_annot_surv_med[["fill"]])) "#00000020" else control_annot_surv_med[["fill"]] + + gg_surv_med <- df2gg(surv_med_tbl, font_size = font_size, colwidths = c(1, 1, 2), bg_fill = bg_fill) + + theme( + axis.text.y = element_text(size = font_size, face = "italic", hjust = 1), + plot.margin = margin(0, 2, 0, 5) + ) + + coord_cartesian(clip = "off", ylim = c(0.5, nrow(surv_med_tbl) + 1.5)) + gg_surv_med <- suppressMessages( + gg_surv_med + + scale_x_continuous(expand = c(0.025, 0)) + + scale_y_continuous(labels = rev(rownames(surv_med_tbl)), breaks = seq_len(nrow(surv_med_tbl))) + ) + + gg_plt <- cowplot::ggdraw(gg_plt) + + cowplot::draw_plot( + gg_surv_med, control_annot_surv_med[["x"]], control_annot_surv_med[["y"]], + width = control_annot_surv_med[["w"]], height = control_annot_surv_med[["h"]], + vjust = 0.5, hjust = 0.5 + ) + } + + # 3. Cox-PH annotation table + if (!is.null(coxph_tbl)) { + # coxph_tbl is pre-computed outside g_km, just plot it + bg_fill <- if (isTRUE(control_annot_coxph[["fill"]])) "#00000020" else control_annot_coxph[["fill"]] + + gg_coxph <- df2gg(coxph_tbl, font_size = font_size, colwidths = c(1.1, 1, 3), bg_fill = bg_fill) + + theme( + axis.text.y = element_text(size = font_size, face = "italic", hjust = 1), + plot.margin = margin(0, 2, 0, 5) + ) + + coord_cartesian(clip = "off", ylim = c(0.5, nrow(coxph_tbl) + 1.5)) + gg_coxph <- suppressMessages( + gg_coxph + + scale_x_continuous(expand = c(0.025, 0)) + + scale_y_continuous(labels = rev(rownames(coxph_tbl)), breaks = seq_len(nrow(coxph_tbl))) + ) + + gg_plt <- cowplot::ggdraw(gg_plt) + + cowplot::draw_plot( + gg_coxph, control_annot_coxph[["x"]], control_annot_coxph[["y"]], + width = control_annot_coxph[["w"]], height = control_annot_coxph[["h"]], + vjust = 0.5, hjust = 0.5 + ) + } + + # add at risk annotation table (unchanged logic) + if (annot_at_risk) { + annot_tbl <- summary(fit_km, times = xticks, extend = TRUE) + annot_tbl <- if (is.null(fit_km$strata)) { + data.frame( + n.risk = annot_tbl$n.risk, time = annot_tbl$time, strata = armval + ) + } else { + strata_lst <- strsplit(sub("=", "equals", levels(annot_tbl$strata)), "equals") + levels(annot_tbl$strata) <- matrix(unlist(strata_lst), ncol = 2, byrow = TRUE)[, 2] + data.frame( + n.risk = annot_tbl$n.risk, time = annot_tbl$time, strata = annot_tbl$strata + ) + } + + at_risk_tbl <- as.data.frame(tidyr::pivot_wider(annot_tbl, names_from = "time", values_from = "n.risk")[, -1]) + at_risk_tbl[is.na(at_risk_tbl)] <- 0 + rownames(at_risk_tbl) <- levels(annot_tbl$strata) + + gg_at_risk <- df2gg( + at_risk_tbl, font_size = font_size, col_labels = FALSE, hline = FALSE, + colwidths = rep(1, ncol(at_risk_tbl)) + ) + + labs(title = if (annot_at_risk_title) "Patients at Risk:" else NULL, x = xlab) + + theme_bw(base_size = font_size) + + theme( + plot.title = element_text(size = font_size, vjust = 3, face = "bold"), + panel.border = element_blank(), panel.grid = element_blank(), + axis.title.y = element_blank(), axis.ticks.y = element_blank(), + axis.text.y = element_text(size = font_size, face = "italic", hjust = 1), + axis.text.x = element_text(size = font_size), axis.line.x = element_line() + ) + + coord_cartesian(clip = "off", ylim = c(0.5, nrow(at_risk_tbl))) + gg_at_risk <- suppressMessages( + gg_at_risk + + scale_x_continuous(expand = c(0.025, 0), breaks = seq_along(at_risk_tbl) - 0.5, labels = xticks) + + scale_y_continuous(labels = rev(levels(annot_tbl$strata)), breaks = seq_len(nrow(at_risk_tbl))) + ) + + if (!as_list) { + gg_plt <- cowplot::plot_grid( + gg_plt, gg_at_risk, align = "v", axis = "tblr", ncol = 1, + rel_heights = c(rel_height_plot, 1 - rel_height_plot) + ) + } + } + + if (as_list) { + list(plot = gg_plt, table = gg_at_risk) + } else { + gg_plt + } +} diff --git a/man/g_km.Rd b/man/g_km.Rd new file mode 100644 index 00000000..c1dab50c --- /dev/null +++ b/man/g_km.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gkm.R +\name{g_km} +\alias{g_km} +\title{g_km plot} +\usage{ +g_km( + fit_km, + variables, + coxph_tbl = NULL, + control_surv = control_surv_timepoint(), + col = NULL, + lty = NULL, + lwd = 0.5, + censor_show = TRUE, + pch = 3, + size = 2, + max_time = NULL, + xticks = NULL, + xlab = "Days", + yval = c("Survival", "Failure"), + ylab = paste(yval, "Probability"), + ylim = NULL, + title = NULL, + footnotes = NULL, + font_size = 10, + ci_ribbon = FALSE, + annot_at_risk = TRUE, + annot_at_risk_title = TRUE, + annot_surv_med = TRUE, + control_annot_surv_med = control_surv_med_annot(), + control_annot_coxph = control_coxph_annot(), + legend_pos = NULL, + rel_height_plot = 0.75, + ggtheme = NULL, + as_list = FALSE +) +} +\description{ +g_km plot +} diff --git a/man/obj_label-set.Rd b/man/obj_label-set.Rd new file mode 100644 index 00000000..f566f8c6 --- /dev/null +++ b/man/obj_label-set.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/from_formatters.R +\name{obj_label<-} +\alias{obj_label<-} +\title{The new label} +\usage{ +obj_label(obj) <- value +} +\arguments{ +\item{value}{character(1). The new label} +} +\description{ +The new label +} From 6d8d156fa3fea18f32842066e10b4b57b55b3684 Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Fri, 28 Nov 2025 23:39:09 +0800 Subject: [PATCH 02/51] update --- NAMESPACE | 29 ++++ R/assert.R | 101 ++++++++------ R/gkm.R | 262 +++++++++++++++++++++++++++--------- man/g_km.Rd | 73 +++++++++- man/h_km_fit.Rd | 29 ++++ man/h_tbl_coxph_pairwise.Rd | 37 +++++ 6 files changed, 421 insertions(+), 110 deletions(-) create mode 100644 man/h_km_fit.Rd create mode 100644 man/h_tbl_coxph_pairwise.Rd diff --git a/NAMESPACE b/NAMESPACE index 2484284d..ad4a231e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,6 +13,8 @@ export(add_hierarchical_count_row) export(add_overall) export(filter_hierarchical) export(g_km) +export(h_km_fit) +export(h_tbl_coxph_pairwise) export(label_roche_number) export(label_roche_percent) export(label_roche_pvalue) @@ -40,6 +42,10 @@ exportMethods("obj_label<-") exportMethods(obj_label) import(glue) import(rlang) +importFrom(broom,tidy) +importFrom(cowplot,draw_plot) +importFrom(cowplot,ggdraw) +importFrom(cowplot,plot_grid) importFrom(dplyr,"%>%") importFrom(dplyr,across) importFrom(dplyr,all_of) @@ -52,6 +58,29 @@ importFrom(dplyr,matches) importFrom(dplyr,num_range) importFrom(dplyr,starts_with) importFrom(dplyr,where) +importFrom(ggplot2,aes) +importFrom(ggplot2,coord_cartesian) +importFrom(ggplot2,element_blank) +importFrom(ggplot2,element_line) +importFrom(ggplot2,element_rect) +importFrom(ggplot2,element_text) +importFrom(ggplot2,geom_point) +importFrom(ggplot2,geom_ribbon) +importFrom(ggplot2,geom_step) +importFrom(ggplot2,ggplot) +importFrom(ggplot2,guide_legend) +importFrom(ggplot2,guides) +importFrom(ggplot2,labs) +importFrom(ggplot2,scale_color_manual) +importFrom(ggplot2,scale_fill_manual) +importFrom(ggplot2,scale_linetype_manual) +importFrom(ggplot2,scale_shape_manual) +importFrom(ggplot2,scale_y_continuous) +importFrom(ggplot2,theme) +importFrom(ggplot2,theme_bw) importFrom(gtsummary,add_overall) importFrom(gtsummary,filter_hierarchical) importFrom(gtsummary,sort_hierarchical) +importFrom(stats,as.formula) +importFrom(survival,survfit) +importFrom(tidyr,pivot_wider) diff --git a/R/assert.R b/R/assert.R index 9f2c08e6..47c8b433 100644 --- a/R/assert.R +++ b/R/assert.R @@ -1,5 +1,4 @@ -assert_proportion_value <- function (x, include_boundaries = FALSE) -{ +assert_proportion_value <- function(x, include_boundaries = FALSE) { checkmate::assert_number(x, lower = 0, upper = 1) checkmate::assert_flag(include_boundaries) if (isFALSE(include_boundaries)) { @@ -8,81 +7,97 @@ assert_proportion_value <- function (x, include_boundaries = FALSE) } } -check_list_of_variables <- function (x) -{ +check_list_of_variables <- function(x) { x <- Filter(Negate(is.null), x) - res <- checkmate::check_list(x, names = "named", min.len = 1, - any.missing = FALSE, types = "character") + res <- checkmate::check_list(x, + names = "named", min.len = 1, + any.missing = FALSE, types = "character" + ) if (isTRUE(res)) { res <- checkmate::check_character(unlist(x), min.chars = 1) } res } -assert_list_of_variables <- function (x, .var.name = checkmate::vname(x), add = NULL) -{ - if (missing(x)) - stop(sprintf("argument \"%s\" is missing, with no default", - .var.name)) - res = check_list_of_variables(x) +assert_list_of_variables <- function(x, .var.name = checkmate::vname(x), add = NULL) { + if (missing(x)) { + stop(sprintf( + "argument \"%s\" is missing, with no default", + .var.name + )) + } + res <- check_list_of_variables(x) checkmate::makeAssertion(x, res, .var.name, add) } -check_df_with_variables <- function (df, variables, na_level = NULL) -{ +check_df_with_variables <- function(df, variables, na_level = NULL) { checkmate::assert_data_frame(df) assert_list_of_variables(variables) err_flag <- all(unlist(variables) %in% colnames(df)) checkmate::assert_flag(err_flag) if (isFALSE(err_flag)) { vars <- setdiff(unlist(variables), colnames(df)) - return(paste(deparse(substitute(df)), "does not contain all specified variables as column names. Missing from data frame:", - paste(vars, collapse = ", "))) + return(paste( + deparse(substitute(df)), "does not contain all specified variables as column names. Missing from data frame:", + paste(vars, collapse = ", ") + )) } if (!is.null(na_level)) { checkmate::assert_string(na_level) - res <- unlist(lapply(as.list(df)[unlist(variables)], - function(x) any(x == na_level))) + res <- unlist(lapply( + as.list(df)[unlist(variables)], + function(x) any(x == na_level) + )) if (any(res)) { - return(paste0(deparse(substitute(df)), " contains explicit na_level (", - na_level, ") in the following columns: ", paste0(unlist(variables)[res], - collapse = ", "))) + return(paste0( + deparse(substitute(df)), " contains explicit na_level (", + na_level, ") in the following columns: ", paste0(unlist(variables)[res], + collapse = ", " + ) + )) } } return(TRUE) } -assert_df_with_variables <- function (df, variables, na_level = NULL, .var.name = checkmate::vname(df), - add = NULL) -{ - if (missing(df)) - stop(sprintf("argument \"%s\" is missing, with no default", - .var.name)) - res = check_df_with_variables(df, variables, na_level) +assert_df_with_variables <- function(df, variables, na_level = NULL, .var.name = checkmate::vname(df), + add = NULL) { + if (missing(df)) { + stop(sprintf( + "argument \"%s\" is missing, with no default", + .var.name + )) + } + res <- check_df_with_variables(df, variables, na_level) checkmate::makeAssertion(df, res, .var.name, add) } -check_valid_factor <- function (x, min.levels = 1, max.levels = NULL, null.ok = TRUE, - any.missing = TRUE, n.levels = NULL, len = NULL) -{ +check_valid_factor <- function(x, min.levels = 1, max.levels = NULL, null.ok = TRUE, + any.missing = TRUE, n.levels = NULL, len = NULL) { checkmate::assert_int(min.levels, lower = 1) - res <- checkmate::check_factor(x, min.levels = min.levels, - null.ok = null.ok, max.levels = max.levels, any.missing = any.missing, - n.levels = n.levels) + res <- checkmate::check_factor(x, + min.levels = min.levels, + null.ok = null.ok, max.levels = max.levels, any.missing = any.missing, + n.levels = n.levels + ) if (isTRUE(res)) { res <- checkmate::check_character(levels(x), min.chars = 1) } return(res) } -assert_valid_factor <- function (x, min.levels = 1, max.levels = NULL, null.ok = TRUE, - any.missing = TRUE, n.levels = NULL, len = NULL, .var.name = checkmate::vname(x), - add = NULL) -{ - if (missing(x)) - stop(sprintf("argument \"%s\" is missing, with no default", - .var.name)) - res = check_valid_factor(x, min.levels, max.levels, null.ok, - any.missing, n.levels, len) +assert_valid_factor <- function(x, min.levels = 1, max.levels = NULL, null.ok = TRUE, + any.missing = TRUE, n.levels = NULL, len = NULL, .var.name = checkmate::vname(x), + add = NULL) { + if (missing(x)) { + stop(sprintf( + "argument \"%s\" is missing, with no default", + .var.name + )) + } + res <- check_valid_factor( + x, min.levels, max.levels, null.ok, + any.missing, n.levels, len + ) checkmate::makeAssertion(x, res, .var.name, add) } diff --git a/R/gkm.R b/R/gkm.R index 97d85993..e79f3a9d 100644 --- a/R/gkm.R +++ b/R/gkm.R @@ -1,13 +1,11 @@ -control_surv_timepoint <- function (conf_level = 0.95, conf_type = c("plain", "log", "log-log")) -{ +control_surv_timepoint <- function(conf_level = 0.95, conf_type = c("plain", "log", "log-log")) { conf_type <- match.arg(conf_type) assert_proportion_value(conf_level) list(conf_level = conf_level, conf_type = conf_type) } -control_coxph <- function (pval_method = c("log-rank", "wald", "likelihood"), - ties = c("efron", "breslow", "exact"), conf_level = 0.95) -{ +control_coxph <- function(pval_method = c("log-rank", "wald", "likelihood"), + ties = c("efron", "breslow", "exact"), conf_level = 0.95) { pval_method <- match.arg(pval_method) ties <- match.arg(ties) assert_proportion_value(conf_level) @@ -35,18 +33,20 @@ control_coxph_annot <- function(x = 0.29, y = 0.51, w = 0.4, h = 0.125, fill = T ## 3. Helper Functions (Formatting, Data Preparation, Plotting Utilities) ## ---------------------------------------------------------------------------- -f_conf_level <- function (conf_level) -{ +f_conf_level <- function(conf_level) { assert_proportion_value(conf_level) paste0(conf_level * 100, "% CI") } -df2gg <- function (df, colwidths = NULL, font_size = 10, col_labels = TRUE, - col_lab_fontface = "bold", hline = TRUE, bg_fill = NULL) -{ - df <- as.data.frame(apply(df, 1:2, function(x) if (is.na(x)) - "NA" - else as.character(x))) +df2gg <- function(df, colwidths = NULL, font_size = 10, col_labels = TRUE, + col_lab_fontface = "bold", hline = TRUE, bg_fill = NULL) { + df <- as.data.frame(apply(df, 1:2, function(x) { + if (is.na(x)) { + "NA" + } else { + as.character(x) + } + })) if (col_labels) { df <- as.matrix(df) df <- rbind(colnames(df), df) @@ -55,25 +55,38 @@ df2gg <- function (df, colwidths = NULL, font_size = 10, col_labels = TRUE, colwidths <- apply(df, 2, function(x) max(nchar(x), na.rm = TRUE)) } tot_width <- sum(colwidths) - res <- ggplot2::ggplot(data = df) + theme_void() + scale_x_continuous(limits = c(0, - tot_width)) + scale_y_continuous(limits = c(1, nrow(df))) - if (!is.null(bg_fill)) + res <- ggplot2::ggplot(data = df) + + theme_void() + + scale_x_continuous(limits = c( + 0, + tot_width + )) + + scale_y_continuous(limits = c(1, nrow(df))) + if (!is.null(bg_fill)) { res <- res + theme(plot.background = element_rect(fill = bg_fill)) + } if (hline) { - res <- res + annotate("segment", x = 0 + 0.2 * colwidths[2], - xend = tot_width - 0.1 * tail(colwidths, 1), y = nrow(df) - - 0.5, yend = nrow(df) - 0.5) + res <- res + annotate("segment", + x = 0 + 0.2 * colwidths[2], + xend = tot_width - 0.1 * tail(colwidths, 1), y = nrow(df) - + 0.5, yend = nrow(df) - 0.5 + ) } for (i in seq_len(ncol(df))) { - line_pos <- c(if (i == 1) 0 else sum(colwidths[1:(i - - 1)]), sum(colwidths[1:i])) - res <- res + annotate("text", x = mean(line_pos), y = rev(seq_len(nrow(df))), - label = df[, i], size = font_size/.pt, fontface = if (col_labels) { - c(col_lab_fontface, rep("plain", nrow(df) - 1)) - } - else { - rep("plain", nrow(df)) - }) + line_pos <- c(if (i == 1) { + 0 + } else { + sum(colwidths[1:(i - + 1)]) + }, sum(colwidths[1:i])) + res <- res + annotate("text", + x = mean(line_pos), y = rev(seq_len(nrow(df))), + label = df[, i], size = font_size / .pt, fontface = if (col_labels) { + c(col_lab_fontface, rep("plain", nrow(df) - 1)) + } else { + rep("plain", nrow(df)) + } + ) } res } @@ -125,12 +138,29 @@ h_tbl_median_surv <- function(fit_km, armval = "All") { ) } +#' @title Pairwise Cox Proportional Hazards Model Summary Table +#' +#' @description This function computes and formats the results of a pairwise Cox Proportional +#' Hazards (Cox-PH) regression analysis between different treatment arms. +#' +#' @param df A data frame containing the survival data. +#' @param variables A named list specifying the column names for time-to-event (\code{tte}), +#' treatment arm (\code{arm}), event status (\code{is_event}), and optional strata (\code{strata}) in \code{df}. +#' @param ref_group_coxph An optional string specifying the reference group for the Cox-PH model. +#' If \code{NULL}, the first factor level of the arm variable is used as the reference group. +#' @param control_coxph_pw A list of control parameters for the Cox-PH model, typically +#' generated by \code{\link{control_coxph}}, controlling the p-value method, ties handling, and confidence level. +#' @param annot_coxph_ref_lbls A logical flag indicating whether to append "vs. ref group" +#' to the row names in the resulting table. +#' +#' @return A data frame summarizing the pairwise Cox-PH results, including Hazard Ratio (HR), +#' its confidence interval, and the p-value. +#' @export h_tbl_coxph_pairwise <- function(df, variables, ref_group_coxph = NULL, control_coxph_pw = control_coxph(), annot_coxph_ref_lbls = FALSE) { - assert_df_with_variables(df, variables) checkmate::assert_choice(ref_group_coxph, levels(df[[variables$arm]]), null.ok = TRUE) checkmate::assert_flag(annot_coxph_ref_lbls) @@ -217,9 +247,8 @@ h_data_plot <- function(fit_km, ## ---------------------------------------------------------------------------- s_coxph_pairwise <- - function (df, .ref_group, .in_ref_col, .var, is_event, strata = NULL, - control = control_coxph(), ...) - { + function(df, .ref_group, .in_ref_col, .var, is_event, strata = NULL, + control = control_coxph(), ...) { checkmate::assert_string(.var) checkmate::assert_numeric(df[[.var]]) checkmate::assert_logical(df[[is_event]]) @@ -228,47 +257,102 @@ s_coxph_pairwise <- ties <- control$ties conf_level <- control$conf_level if (.in_ref_col) { - return(list(pvalue = with_label(numeric(), - paste0("p-value (", pval_method, ")")), hr = with_label(numeric(), - "Hazard Ratio"), hr_ci = with_label(numeric(), - f_conf_level(conf_level)), hr_ci_3d = with_label(numeric(), - paste0("Hazard Ratio (", f_conf_level(conf_level), - ")")), n_tot = with_label(numeric(), - "Total n"), n_tot_events = with_label(numeric(), - "Total events"))) + return(list(pvalue = with_label( + numeric(), + paste0("p-value (", pval_method, ")") + ), hr = with_label( + numeric(), + "Hazard Ratio" + ), hr_ci = with_label( + numeric(), + f_conf_level(conf_level) + ), hr_ci_3d = with_label( + numeric(), + paste0( + "Hazard Ratio (", f_conf_level(conf_level), + ")" + ) + ), n_tot = with_label( + numeric(), + "Total n" + ), n_tot_events = with_label( + numeric(), + "Total events" + ))) } data <- rbind(.ref_group, df) group <- factor(rep(c("ref", "x"), c(nrow(.ref_group), nrow(df))), - levels = c("ref", "x")) - df_cox <- data.frame(tte = data[[.var]], is_event = data[[is_event]], - arm = group) + levels = c("ref", "x") + ) + df_cox <- data.frame( + tte = data[[.var]], is_event = data[[is_event]], + arm = group + ) if (is.null(strata)) { formula_cox <- survival::Surv(tte, is_event) ~ arm - } - else { - formula_cox <- stats::as.formula(paste0("survival::Surv(tte, is_event) ~ arm + strata(", - paste(strata, collapse = ","), ")")) + } else { + formula_cox <- stats::as.formula(paste0( + "survival::Surv(tte, is_event) ~ arm + strata(", + paste(strata, collapse = ","), ")" + )) df_cox <- cbind(df_cox, data[strata]) } - cox_fit <- survival::coxph(formula = formula_cox, data = df_cox, - ties = ties) + cox_fit <- survival::coxph( + formula = formula_cox, data = df_cox, + ties = ties + ) sum_cox <- summary(cox_fit, conf.int = conf_level, extend = TRUE) orginal_survdiff <- survival::survdiff(formula_cox, data = df_cox) log_rank_pvalue <- 1 - pchisq(orginal_survdiff$chisq, length(orginal_survdiff$n) - - 1) - pval <- switch(pval_method, wald = sum_cox$waldtest["pvalue"], - `log-rank` = log_rank_pvalue, likelihood = sum_cox$logtest["pvalue"]) - list(pvalue = with_label(unname(pval), paste0("p-value (", - pval_method, ")")), hr = with_label(sum_cox$conf.int[1, - 1], "Hazard Ratio"), hr_ci = with_label(unname(sum_cox$conf.int[1, - 3:4]), f_conf_level(conf_level)), hr_ci_3d = with_label(c(sum_cox$conf.int[1, - 1], unname(sum_cox$conf.int[1, 3:4])), paste0("Hazard Ratio (", - f_conf_level(conf_level), ")")), n_tot = with_label(sum_cox$n, - "Total n"), n_tot_events = with_label(sum_cox$nevent, - "Total events")) + 1) + pval <- switch(pval_method, + wald = sum_cox$waldtest["pvalue"], + `log-rank` = log_rank_pvalue, + likelihood = sum_cox$logtest["pvalue"] + ) + list(pvalue = with_label(unname(pval), paste0( + "p-value (", + pval_method, ")" + )), hr = with_label(sum_cox$conf.int[ + 1, + 1 + ], "Hazard Ratio"), hr_ci = with_label(unname(sum_cox$conf.int[ + 1, + 3:4 + ]), f_conf_level(conf_level)), hr_ci_3d = with_label(c(sum_cox$conf.int[ + 1, + 1 + ], unname(sum_cox$conf.int[1, 3:4])), paste0( + "Hazard Ratio (", + f_conf_level(conf_level), ")" + )), n_tot = with_label( + sum_cox$n, + "Total n" + ), n_tot_events = with_label( + sum_cox$nevent, + "Total events" + )) } +#' @title Kaplan-Meier Survival Curve Fitting +#' +#' @description This helper function fits a Kaplan-Meier survival curve model +#' using the formula \code{survival::Surv(tte, is_event) ~ arm}. +#' It is designed to be a prerequisite for plotting functions like \code{\link{g_km}}. +#' +#' @param df A data frame containing time-to-event (tte), event status (\code{is_event}), +#' and treatment arm (\code{arm}) variables. +#' @param variables A named list specifying the column names for time-to-event (\code{tte}), +#' event status (\code{is_event}), and treatment arm (\code{arm}) in \code{df}. +#' For example: \code{list(tte = "time_column", is_event = "status_column", arm = "group_column")}. +#' @param control_surv A list of control parameters for the \code{survival::survfit} function, +#' typically generated by \code{\link{control_surv_timepoint}}, controlling confidence level +#' and confidence interval type. +#' +#' @return An object of class \code{survfit} from the \code{survival} package, containing +#' the fitted Kaplan-Meier curves. +#' @export h_km_fit <- function(df, variables, control_surv = control_surv_timepoint()) { tte <- variables$tte is_event <- variables$is_event @@ -287,7 +371,54 @@ h_km_fit <- function(df, variables, control_surv = control_surv_timepoint()) { return(fit_km) } -#' g_km plot +#' @title Generate a Kaplan-Meier Plot with Annotations +#' +#' @description This function creates a comprehensive ggplot2 object for a Kaplan-Meier +#' survival curve, optionally including annotations for median survival and Cox-PH results, +#' and a 'Numbers at Risk' table below the main plot. +#' +#' @param fit_km A fitted Kaplan-Meier object of class \code{survfit}, typically +#' generated by \code{\link{h_km_fit}}. +#' @param variables A named list specifying the survival and grouping variables (needed +#' for accessing the column names, even if the fit is provided). +#' @param coxph_tbl An optional data frame containing pre-calculated Cox-PH results, +#' typically generated by \code{\link{h_tbl_coxph_pairwise}}. If provided, a Cox-PH annotation table is added. +#' @param control_surv A list of control parameters for the survival model, from \code{\link{control_surv_timepoint}}. +#' @param col A character vector of colors for the survival curves. Length should match number of arms. +#' @param lty A vector of line types for the survival curves, or \code{NULL} for default. +#' @param lwd Numeric value specifying line width for the survival curves. +#' @param censor_show Logical, whether to display censoring marks on the plot. +#' @param pch Plotting character for censoring marks. +#' @param size Size of the censoring marks. +#' @param max_time Numeric, the maximum time point to display on the x-axis. +#' @param xticks Numeric vector of x-axis tick positions, or a single number for the interval, or \code{NULL} for auto. +#' @param xlab Character string for the x-axis label. +#' @param yval Character string, either \code{"Survival"} or \code{"Failure"} to plot Survival or Failure probability. +#' @param ylab Character string for the y-axis label. +#' @param ylim Numeric vector of length 2 for y-axis limits. +#' @param title Character string for the plot title. +#' @param footnotes Character string for plot footnotes/caption. +#' @param font_size Numeric, base font size for the plot theme. +#' @param ci_ribbon Logical, whether to display confidence intervals as a ribbon (area). +#' @param annot_at_risk Logical, whether to include the 'Numbers at Risk' table below the plot. +#' @param annot_at_risk_title Logical, whether to include the title "Patients at Risk:" in the table. +#' @param annot_surv_med Logical, whether to include the median survival time annotation table. +#' @param control_annot_surv_med A list of control parameters for the median survival annotation box, +#' typically generated by \code{\link{control_surv_med_annot}}. +#' @param control_annot_coxph A list of control parameters for the Cox-PH annotation box, +#' typically generated by \code{\link{control_coxph_annot}}. +#' @param legend_pos Numeric vector of length 2 for legend position (x, y) relative to the plot area (0 to 1), or \code{NULL} for auto-placement. +#' @param rel_height_plot Numeric, relative height of the main plot area compared to the 'at-risk' table (0 to 1). +#' @param ggtheme An optional \code{ggplot2} theme to apply. +#' @param as_list Logical, if \code{TRUE}, returns a list containing the plot and the 'at-risk' table as separate \code{cowplot} objects; otherwise returns the combined \code{cowplot} object. +#' +#' @return A \code{cowplot} object combining the KM plot and the 'Numbers at Risk' table, or a list if \code{as_list = TRUE}. +#' @importFrom ggplot2 ggplot aes theme_bw scale_y_continuous labs theme element_text element_blank element_rect element_line geom_step geom_point scale_shape_manual guides guide_legend geom_ribbon scale_color_manual scale_fill_manual scale_linetype_manual coord_cartesian +#' @importFrom cowplot ggdraw draw_plot plot_grid +#' @importFrom tidyr pivot_wider +#' @importFrom survival survfit +#' @importFrom broom tidy +#' @importFrom stats as.formula #' @export g_km <- function(fit_km, variables, @@ -318,7 +449,6 @@ g_km <- function(fit_km, rel_height_plot = 0.75, ggtheme = NULL, as_list = FALSE) { - # --- Data Extraction and Assertions --- checkmate::assert_class(fit_km, "survfit") checkmate::assert_list(variables) @@ -407,7 +537,7 @@ g_km <- function(fit_km, y_rng <- ylim[2] - ylim[1] if (yval == "Survival" && all(data$estimate[data$time == max_time2] > ylim[1] + 0.09 * y_rng) && - all(data$estimate[data$time == max_time2] < ylim[1] + 0.5 * y_rng)) { # nolint + all(data$estimate[data$time == max_time2] < ylim[1] + 0.5 * y_rng)) { # nolint gg_plt <- gg_plt + theme( legend.position.inside = c(1, 0.5), @@ -525,7 +655,8 @@ g_km <- function(fit_km, rownames(at_risk_tbl) <- levels(annot_tbl$strata) gg_at_risk <- df2gg( - at_risk_tbl, font_size = font_size, col_labels = FALSE, hline = FALSE, + at_risk_tbl, + font_size = font_size, col_labels = FALSE, hline = FALSE, colwidths = rep(1, ncol(at_risk_tbl)) ) + labs(title = if (annot_at_risk_title) "Patients at Risk:" else NULL, x = xlab) + @@ -546,7 +677,8 @@ g_km <- function(fit_km, if (!as_list) { gg_plt <- cowplot::plot_grid( - gg_plt, gg_at_risk, align = "v", axis = "tblr", ncol = 1, + gg_plt, gg_at_risk, + align = "v", axis = "tblr", ncol = 1, rel_heights = c(rel_height_plot, 1 - rel_height_plot) ) } diff --git a/man/g_km.Rd b/man/g_km.Rd index c1dab50c..7fd3be45 100644 --- a/man/g_km.Rd +++ b/man/g_km.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/gkm.R \name{g_km} \alias{g_km} -\title{g_km plot} +\title{Generate a Kaplan-Meier Plot with Annotations} \usage{ g_km( fit_km, @@ -36,6 +36,75 @@ g_km( as_list = FALSE ) } +\arguments{ +\item{fit_km}{A fitted Kaplan-Meier object of class \code{survfit}, typically +generated by \code{\link{h_km_fit}}.} + +\item{variables}{A named list specifying the survival and grouping variables (needed +for accessing the column names, even if the fit is provided).} + +\item{coxph_tbl}{An optional data frame containing pre-calculated Cox-PH results, +typically generated by \code{\link{h_tbl_coxph_pairwise}}. If provided, a Cox-PH annotation table is added.} + +\item{control_surv}{A list of control parameters for the survival model, from \code{\link{control_surv_timepoint}}.} + +\item{col}{A character vector of colors for the survival curves. Length should match number of arms.} + +\item{lty}{A vector of line types for the survival curves, or \code{NULL} for default.} + +\item{lwd}{Numeric value specifying line width for the survival curves.} + +\item{censor_show}{Logical, whether to display censoring marks on the plot.} + +\item{pch}{Plotting character for censoring marks.} + +\item{size}{Size of the censoring marks.} + +\item{max_time}{Numeric, the maximum time point to display on the x-axis.} + +\item{xticks}{Numeric vector of x-axis tick positions, or a single number for the interval, or \code{NULL} for auto.} + +\item{xlab}{Character string for the x-axis label.} + +\item{yval}{Character string, either \code{"Survival"} or \code{"Failure"} to plot Survival or Failure probability.} + +\item{ylab}{Character string for the y-axis label.} + +\item{ylim}{Numeric vector of length 2 for y-axis limits.} + +\item{title}{Character string for the plot title.} + +\item{footnotes}{Character string for plot footnotes/caption.} + +\item{font_size}{Numeric, base font size for the plot theme.} + +\item{ci_ribbon}{Logical, whether to display confidence intervals as a ribbon (area).} + +\item{annot_at_risk}{Logical, whether to include the 'Numbers at Risk' table below the plot.} + +\item{annot_at_risk_title}{Logical, whether to include the title "Patients at Risk:" in the table.} + +\item{annot_surv_med}{Logical, whether to include the median survival time annotation table.} + +\item{control_annot_surv_med}{A list of control parameters for the median survival annotation box, +typically generated by \code{\link{control_surv_med_annot}}.} + +\item{control_annot_coxph}{A list of control parameters for the Cox-PH annotation box, +typically generated by \code{\link{control_coxph_annot}}.} + +\item{legend_pos}{Numeric vector of length 2 for legend position (x, y) relative to the plot area (0 to 1), or \code{NULL} for auto-placement.} + +\item{rel_height_plot}{Numeric, relative height of the main plot area compared to the 'at-risk' table (0 to 1).} + +\item{ggtheme}{An optional \code{ggplot2} theme to apply.} + +\item{as_list}{Logical, if \code{TRUE}, returns a list containing the plot and the 'at-risk' table as separate \code{cowplot} objects; otherwise returns the combined \code{cowplot} object.} +} +\value{ +A \code{cowplot} object combining the KM plot and the 'Numbers at Risk' table, or a list if \code{as_list = TRUE}. +} \description{ -g_km plot +This function creates a comprehensive ggplot2 object for a Kaplan-Meier +survival curve, optionally including annotations for median survival and Cox-PH results, +and a 'Numbers at Risk' table below the main plot. } diff --git a/man/h_km_fit.Rd b/man/h_km_fit.Rd new file mode 100644 index 00000000..bbb66746 --- /dev/null +++ b/man/h_km_fit.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gkm.R +\name{h_km_fit} +\alias{h_km_fit} +\title{Kaplan-Meier Survival Curve Fitting} +\usage{ +h_km_fit(df, variables, control_surv = control_surv_timepoint()) +} +\arguments{ +\item{df}{A data frame containing time-to-event (tte), event status (\code{is_event}), +and treatment arm (\code{arm}) variables.} + +\item{variables}{A named list specifying the column names for time-to-event (\code{tte}), +event status (\code{is_event}), and treatment arm (\code{arm}) in \code{df}. +For example: \code{list(tte = "time_column", is_event = "status_column", arm = "group_column")}.} + +\item{control_surv}{A list of control parameters for the \code{survival::survfit} function, +typically generated by \code{\link{control_surv_timepoint}}, controlling confidence level +and confidence interval type.} +} +\value{ +An object of class \code{survfit} from the \code{survival} package, containing +the fitted Kaplan-Meier curves. +} +\description{ +This helper function fits a Kaplan-Meier survival curve model +using the formula \code{survival::Surv(tte, is_event) ~ arm}. +It is designed to be a prerequisite for plotting functions like \code{\link{g_km}}. +} diff --git a/man/h_tbl_coxph_pairwise.Rd b/man/h_tbl_coxph_pairwise.Rd new file mode 100644 index 00000000..ee4add72 --- /dev/null +++ b/man/h_tbl_coxph_pairwise.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gkm.R +\name{h_tbl_coxph_pairwise} +\alias{h_tbl_coxph_pairwise} +\title{Pairwise Cox Proportional Hazards Model Summary Table} +\usage{ +h_tbl_coxph_pairwise( + df, + variables, + ref_group_coxph = NULL, + control_coxph_pw = control_coxph(), + annot_coxph_ref_lbls = FALSE +) +} +\arguments{ +\item{df}{A data frame containing the survival data.} + +\item{variables}{A named list specifying the column names for time-to-event (\code{tte}), +treatment arm (\code{arm}), event status (\code{is_event}), and optional strata (\code{strata}) in \code{df}.} + +\item{ref_group_coxph}{An optional string specifying the reference group for the Cox-PH model. +If \code{NULL}, the first factor level of the arm variable is used as the reference group.} + +\item{control_coxph_pw}{A list of control parameters for the Cox-PH model, typically +generated by \code{\link{control_coxph}}, controlling the p-value method, ties handling, and confidence level.} + +\item{annot_coxph_ref_lbls}{A logical flag indicating whether to append "vs. ref group" +to the row names in the resulting table.} +} +\value{ +A data frame summarizing the pairwise Cox-PH results, including Hazard Ratio (HR), +its confidence interval, and the p-value. +} +\description{ +This function computes and formats the results of a pairwise Cox Proportional +Hazards (Cox-PH) regression analysis between different treatment arms. +} From 41b74977f10b298f4ccaa3dd1707d65e515c3b79 Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Sat, 29 Nov 2025 08:40:40 +0800 Subject: [PATCH 03/51] wip --- DESCRIPTION | 2 ++ R/assert.R | 7 +++++++ R/from_formatters.R | 2 +- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2b0e7dd9..c976c2f9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,10 +26,12 @@ Imports: broom (>= 1.0.8), cards (>= 0.7.0), cardx (>= 0.3.0), + cowplot (>= 1.2.0), checkmate (>= 2.3.2), cli (>= 3.6.4), dplyr (>= 1.1.4), flextable (>= 0.9.7), + ggplot2 (>= 4.0.1), glue (>= 1.8.0), gt (>= 0.11.1), lifecycle, diff --git a/R/assert.R b/R/assert.R index 47c8b433..8165d3ba 100644 --- a/R/assert.R +++ b/R/assert.R @@ -1,3 +1,7 @@ +# styler: off +# nocov start + + assert_proportion_value <- function(x, include_boundaries = FALSE) { checkmate::assert_number(x, lower = 0, upper = 1) checkmate::assert_flag(include_boundaries) @@ -101,3 +105,6 @@ assert_valid_factor <- function(x, min.levels = 1, max.levels = NULL, null.ok = ) checkmate::makeAssertion(x, res, .var.name, add) } + +# nocov end +# styler: on diff --git a/R/from_formatters.R b/R/from_formatters.R index 218adba7..0bab0ab7 100644 --- a/R/from_formatters.R +++ b/R/from_formatters.R @@ -1,6 +1,6 @@ # ## Changelog -# nocov start # styler: off +# nocov start setGeneric("obj_label", function(obj) standardGeneric("obj_label")) From 9b8807a4e4a996976347bf859248c4fa0074499b Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Sat, 29 Nov 2025 08:42:00 +0800 Subject: [PATCH 04/51] wordlist --- inst/WORDLIST | 2 ++ 1 file changed, 2 insertions(+) diff --git a/inst/WORDLIST b/inst/WORDLIST index b95a577a..7f7292b8 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -15,11 +15,13 @@ cardx de flextable funder +ggplot gtsummary pharma pre rlang's survfit tbl +tte tidyselect unstratified From 6b51033152372648b6f1c16ee01b80bff4abe5aa Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Sun, 30 Nov 2025 08:27:38 +0800 Subject: [PATCH 05/51] adding code --- NAMESPACE | 14 +- R/gkm.R | 671 +++++++++++++++++++++------------- man/annot_at_risk.Rd | 34 ++ man/annot_cox_ph.Rd | 31 ++ man/annot_surv_med.Rd | 30 ++ man/control_coxph.Rd | 28 ++ man/control_coxph_annot.Rd | 35 ++ man/control_surv_med_annot.Rd | 26 ++ man/control_surv_timepoint.Rd | 24 ++ man/df2gg.Rd | 37 ++ man/f_conf_level.Rd | 17 + man/g_km.Rd | 51 +-- man/h_data_plot.Rd | 23 ++ man/h_km_fit.Rd | 3 +- man/h_tbl_median_surv.Rd | 20 + man/h_xticks.Rd | 21 ++ man/s_coxph_pairwise.Rd | 43 +++ 17 files changed, 801 insertions(+), 307 deletions(-) create mode 100644 man/annot_at_risk.Rd create mode 100644 man/annot_cox_ph.Rd create mode 100644 man/annot_surv_med.Rd create mode 100644 man/control_coxph.Rd create mode 100644 man/control_coxph_annot.Rd create mode 100644 man/control_surv_med_annot.Rd create mode 100644 man/control_surv_timepoint.Rd create mode 100644 man/df2gg.Rd create mode 100644 man/f_conf_level.Rd create mode 100644 man/h_data_plot.Rd create mode 100644 man/h_tbl_median_surv.Rd create mode 100644 man/h_xticks.Rd create mode 100644 man/s_coxph_pairwise.Rd diff --git a/NAMESPACE b/NAMESPACE index ad4a231e..024efc1a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -11,9 +11,12 @@ export("obj_label<-") export(add_blank_rows) export(add_hierarchical_count_row) export(add_overall) +export(annot_at_risk) +export(annot_cox_ph) +export(annot_surv_med) export(filter_hierarchical) export(g_km) -export(h_km_fit) +export(h_data_plot) export(h_tbl_coxph_pairwise) export(label_roche_number) export(label_roche_percent) @@ -58,7 +61,9 @@ importFrom(dplyr,matches) importFrom(dplyr,num_range) importFrom(dplyr,starts_with) importFrom(dplyr,where) +importFrom(ggplot2,.pt) importFrom(ggplot2,aes) +importFrom(ggplot2,annotate) importFrom(ggplot2,coord_cartesian) importFrom(ggplot2,element_blank) importFrom(ggplot2,element_line) @@ -71,16 +76,23 @@ importFrom(ggplot2,ggplot) importFrom(ggplot2,guide_legend) importFrom(ggplot2,guides) importFrom(ggplot2,labs) +importFrom(ggplot2,margin) importFrom(ggplot2,scale_color_manual) importFrom(ggplot2,scale_fill_manual) importFrom(ggplot2,scale_linetype_manual) importFrom(ggplot2,scale_shape_manual) +importFrom(ggplot2,scale_x_continuous) importFrom(ggplot2,scale_y_continuous) importFrom(ggplot2,theme) importFrom(ggplot2,theme_bw) +importFrom(ggplot2,theme_void) importFrom(gtsummary,add_overall) importFrom(gtsummary,filter_hierarchical) importFrom(gtsummary,sort_hierarchical) +importFrom(rlang,.data) importFrom(stats,as.formula) +importFrom(survival,Surv) +importFrom(survival,coxph) +importFrom(survival,survdiff) importFrom(survival,survfit) importFrom(tidyr,pivot_wider) diff --git a/R/gkm.R b/R/gkm.R index e79f3a9d..aab7d8c1 100644 --- a/R/gkm.R +++ b/R/gkm.R @@ -1,17 +1,35 @@ -control_surv_timepoint <- function(conf_level = 0.95, conf_type = c("plain", "log", "log-log")) { - conf_type <- match.arg(conf_type) - assert_proportion_value(conf_level) - list(conf_level = conf_level, conf_type = conf_type) -} - +#' @title Control parameters for Cox Proportional Hazards model +#' +#' @description Creates a list of control parameters for Cox Proportional Hazards (Cox-PH) +#' analysis, used by functions like \code{\link{h_tbl_coxph_pairwise}}. +#' +#' @param pval_method A character string specifying the method for calculating the p-value. +#' Must be one of \code{"log-rank"}, \code{"wald"}, or \code{"likelihood"}. +#' @param ties A character string specifying the method for handling tied failure times. +#' Must be one of \code{"efron"}, \code{"breslow"}, or \code{"exact"}. +#' @param conf_level A numeric value between 0 and 1, specifying the confidence level. +#' +#' @return A list with elements \code{pval_method}, \code{ties}, and \code{conf_level}. control_coxph <- function(pval_method = c("log-rank", "wald", "likelihood"), ties = c("efron", "breslow", "exact"), conf_level = 0.95) { pval_method <- match.arg(pval_method) ties <- match.arg(ties) - assert_proportion_value(conf_level) + assert_proportion_value(conf_level) # Assuming assert_proportion_value is defined elsewhere list(pval_method = pval_method, ties = ties, conf_level = conf_level) } +#' @title Control parameters for Median Survival Annotation Box +#' +#' @description Creates a list of control parameters for positioning and styling the +#' median survival annotation box on a plot. +#' +#' @param x A numeric value (0 to 1) for the x-coordinate of the box center (relative to plot area). +#' @param y A numeric value (0 to 1) for the y-coordinate of the box center (relative to plot area). +#' @param w A numeric value (0 to 1) for the width of the box (relative to plot area). +#' @param h A numeric value (0 to 1) for the height of the box (relative to plot area). +#' @param fill A logical value (\code{TRUE} for a default light gray fill) or a color string for the box background. +#' +#' @return A list with elements \code{x}, \code{y}, \code{w}, \code{h}, and \code{fill}. control_surv_med_annot <- function(x = 0.8, y = 0.85, w = 0.32, h = 0.16, fill = TRUE) { assert_proportion_value(x) assert_proportion_value(y) @@ -21,6 +39,19 @@ control_surv_med_annot <- function(x = 0.8, y = 0.85, w = 0.32, h = 0.16, fill = list(x = x, y = y, w = w, h = h, fill = fill) } +#' @title Control parameters for Cox-PH Annotation Box +#' +#' @description Creates a list of control parameters for positioning and styling the +#' Cox Proportional Hazards annotation box on a plot. +#' +#' @param x A numeric value (0 to 1) for the x-coordinate of the box center (relative to plot area). +#' @param y A numeric value (0 to 1) for the y-coordinate of the box center (relative to plot area). +#' @param w A numeric value (0 to 1) for the width of the box (relative to plot area). +#' @param h A numeric value (0 to 1) for the height of the box (relative to plot area). +#' @param fill A logical value (\code{TRUE} for a default light gray fill) or a color string for the box background. +#' @param ref_lbls A logical flag indicating whether to append "vs. ref group" to row names. +#' +#' @return A list with elements \code{x}, \code{y}, \code{w}, \code{h}, \code{fill}, and \code{ref_lbls}. control_coxph_annot <- function(x = 0.29, y = 0.51, w = 0.4, h = 0.125, fill = TRUE, ref_lbls = FALSE) { checkmate::assert_logical(ref_lbls, any.missing = FALSE) @@ -29,17 +60,34 @@ control_coxph_annot <- function(x = 0.29, y = 0.51, w = 0.4, h = 0.125, fill = T } -## ---------------------------------------------------------------------------- -## 3. Helper Functions (Formatting, Data Preparation, Plotting Utilities) -## ---------------------------------------------------------------------------- +## Helper Functions (Formatting, Data Preparation, Plotting Utilities) +#' @title Format Confidence Level String +#' @description Converts a confidence level (e.g., 0.95) to a formatted string (e.g., "95% CI"). +#' @param conf_level A numeric confidence level (proportion, 0 to 1). +#' @return A character string. f_conf_level <- function(conf_level) { - assert_proportion_value(conf_level) + assert_proportion_value(conf_level) # Assuming assert_proportion_value is defined elsewhere paste0(conf_level * 100, "% CI") } +#' @title Convert Data Frame to ggplot2 Table Graphic +#' +#' @description Creates a \code{ggplot2} object that renders a data frame as a table graphic. +#' +#' @param df The data frame to render. +#' @param colwidths Numeric vector of relative column widths. If \code{NULL}, determined by max character length. +#' @param font_size Numeric base font size. +#' @param col_labels Logical, whether to display column labels (header). +#' @param col_lab_fontface Character string for the font face of column labels (e.g., "bold"). +#' @param hline Logical, whether to draw a horizontal line below the column labels. +#' @param bg_fill Optional color string for the plot background. +#' +#' @return A \code{ggplot2} object representing the table. +#' @importFrom ggplot2 ggplot theme_void scale_x_continuous scale_y_continuous theme element_rect annotate element_text .pt df2gg <- function(df, colwidths = NULL, font_size = 10, col_labels = TRUE, col_lab_fontface = "bold", hline = TRUE, bg_fill = NULL) { + # ... (function body remains the same) df <- as.data.frame(apply(df, 1:2, function(x) { if (is.na(x)) { "NA" @@ -56,17 +104,17 @@ df2gg <- function(df, colwidths = NULL, font_size = 10, col_labels = TRUE, } tot_width <- sum(colwidths) res <- ggplot2::ggplot(data = df) + - theme_void() + - scale_x_continuous(limits = c( + ggplot2::theme_void() + + ggplot2::scale_x_continuous(limits = c( 0, tot_width )) + - scale_y_continuous(limits = c(1, nrow(df))) + ggplot2::scale_y_continuous(limits = c(1, nrow(df))) if (!is.null(bg_fill)) { - res <- res + theme(plot.background = element_rect(fill = bg_fill)) + res <- res + ggplot2::theme(plot.background = ggplot2::element_rect(fill = bg_fill)) } if (hline) { - res <- res + annotate("segment", + res <- res + ggplot2::annotate("segment", x = 0 + 0.2 * colwidths[2], xend = tot_width - 0.1 * tail(colwidths, 1), y = nrow(df) - 0.5, yend = nrow(df) - 0.5 @@ -79,7 +127,7 @@ df2gg <- function(df, colwidths = NULL, font_size = 10, col_labels = TRUE, sum(colwidths[1:(i - 1)]) }, sum(colwidths[1:i])) - res <- res + annotate("text", + res <- res + ggplot2::annotate("text", x = mean(line_pos), y = rev(seq_len(nrow(df))), label = df[, i], size = font_size / .pt, fontface = if (col_labels) { c(col_lab_fontface, rep("plain", nrow(df) - 1)) @@ -91,7 +139,17 @@ df2gg <- function(df, colwidths = NULL, font_size = 10, col_labels = TRUE, res } +#' @title Calculate X-axis Ticks +#' +#' @description Determines the positions for x-axis ticks based on the data and user input. +#' +#' @param data A data frame containing a \code{time} column. +#' @param xticks A numeric vector of specific tick positions, a single number for the interval, or \code{NULL} for auto-calculation. +#' @param max_time Optional numeric value specifying the maximum time to consider for tick range. +#' +#' @return A numeric vector of x-axis tick positions. h_xticks <- function(data, xticks = NULL, max_time = NULL) { + # ... (function body remains the same) if (is.null(xticks)) { if (is.null(max_time)) { labeling::extended(range(data$time)[1], range(data$time)[2], m = 5) @@ -117,7 +175,17 @@ h_xticks <- function(data, xticks = NULL, max_time = NULL) { } } +#' @title Median Survival Summary Table +#' +#' @description Extracts and formats the median survival time and its confidence interval +#' from a fitted Kaplan-Meier object. +#' +#' @param fit_km A fitted Kaplan-Meier object of class \code{survfit}. +#' @param armval Character string to use as the row name if \code{fit_km} has no strata (e.g., "All"). +#' +#' @return A data frame with columns "N", "Median", and the confidence interval label. h_tbl_median_surv <- function(fit_km, armval = "All") { + # ... (function body remains the same) y <- if (is.null(fit_km$strata)) { as.data.frame(t(summary(fit_km)$table), row.names = armval) } else { @@ -147,21 +215,22 @@ h_tbl_median_surv <- function(fit_km, armval = "All") { #' @param variables A named list specifying the column names for time-to-event (\code{tte}), #' treatment arm (\code{arm}), event status (\code{is_event}), and optional strata (\code{strata}) in \code{df}. #' @param ref_group_coxph An optional string specifying the reference group for the Cox-PH model. -#' If \code{NULL}, the first factor level of the arm variable is used as the reference group. +#' If \code{NULL}, the first factor level of the arm variable is used as the reference group. #' @param control_coxph_pw A list of control parameters for the Cox-PH model, typically -#' generated by \code{\link{control_coxph}}, controlling the p-value method, ties handling, and confidence level. +#' generated by \code{\link{control_coxph}}, controlling the p-value method, ties handling, and confidence level. #' @param annot_coxph_ref_lbls A logical flag indicating whether to append "vs. ref group" -#' to the row names in the resulting table. +#' to the row names in the resulting table. #' #' @return A data frame summarizing the pairwise Cox-PH results, including Hazard Ratio (HR), -#' its confidence interval, and the p-value. +#' its confidence interval, and the p-value. #' @export h_tbl_coxph_pairwise <- function(df, variables, ref_group_coxph = NULL, control_coxph_pw = control_coxph(), annot_coxph_ref_lbls = FALSE) { - assert_df_with_variables(df, variables) + # ... (function body remains the same) + assert_df_with_variables(df, variables) # Assuming assert_df_with_variables is defined elsewhere checkmate::assert_choice(ref_group_coxph, levels(df[[variables$arm]]), null.ok = TRUE) checkmate::assert_flag(annot_coxph_ref_lbls) @@ -190,6 +259,7 @@ h_tbl_coxph_pairwise <- function(df, pvalue = if (res$pvalue < 0.0001) "<0.0001" else format(round(res$pvalue, 4), 4), stringsAsFactors = FALSE ) + # Assuming obj_label is defined elsewhere and hr_ci is the label for the CI colnames(res_df) <- c("HR", vapply(res[c("hr_ci", "pvalue")], obj_label, FUN.VALUE = "character")) # nolint row.names(res_df) <- comp res_df @@ -199,9 +269,23 @@ h_tbl_coxph_pairwise <- function(df, do.call(rbind, results) } +#' @title Prepare Kaplan-Meier Data for Plotting +#' +#' @description Takes a fitted \code{survfit} object and processes it into a data frame +#' suitable for plotting a Kaplan-Meier curve with \code{ggplot2}, including extending +#' the curve to time zero. +#' +#' @param fit_km A fitted Kaplan-Meier object of class \code{survfit}. +#' @param armval Character string for the strata level if \code{fit_km} has no strata (e.g., "All"). +#' @param max_time Numeric, the maximum time point to include in the data, or \code{NULL} for no limit. +#' +#' @return A data frame containing the survival curve steps, confidence intervals, and censoring info. +#' @importFrom broom tidy +#' @export h_data_plot <- function(fit_km, armval = "All", max_time = NULL) { + # ... (function body remains the same) y <- broom::tidy(fit_km) if (!is.null(fit_km$strata)) { @@ -242,13 +326,31 @@ h_data_plot <- function(fit_km, } -## ---------------------------------------------------------------------------- -## 4. Core Statistical Function -## ---------------------------------------------------------------------------- +## Core Statistical Function +#' @title Pairwise Cox Proportional Hazards Model Calculation +#' +#' @description Performs a Cox Proportional Hazards model calculation comparing two groups +#' (a reference group and a comparison group). This is an internal function used by +#' \code{\link{h_tbl_coxph_pairwise}}. +#' +#' @param df Data frame for the comparison group. +#' @param .ref_group Data frame for the reference group. +#' @param .in_ref_col Logical, if \code{TRUE} returns empty results (for internal table building). +#' @param .var Character string for the time-to-event variable name. +#' @param is_event Character string for the event status variable name. +#' @param strata Optional character vector of stratification variable names. +#' @param control A list of control parameters from \code{\link{control_coxph}}. +#' @param ... Additional arguments (not used). +#' +#' @return A list containing the p-value, Hazard Ratio (HR), confidence interval (HR\_CI), +#' and total counts. +#' @importFrom survival Surv coxph survdiff +#' @importFrom stats as.formula s_coxph_pairwise <- function(df, .ref_group, .in_ref_col, .var, is_event, strata = NULL, control = control_coxph(), ...) { + # ... (function body remains the same) checkmate::assert_string(.var) checkmate::assert_numeric(df[[.var]]) checkmate::assert_logical(df[[is_event]]) @@ -257,6 +359,7 @@ s_coxph_pairwise <- ties <- control$ties conf_level <- control$conf_level if (.in_ref_col) { + # ... (returns empty list for reference column) return(list(pvalue = with_label( numeric(), paste0("p-value (", pval_method, ")") @@ -292,7 +395,7 @@ s_coxph_pairwise <- formula_cox <- survival::Surv(tte, is_event) ~ arm } else { formula_cox <- stats::as.formula(paste0( - "survival::Surv(tte, is_event) ~ arm + strata(", + "survival::Surv(tte, is_event) ~ arm + survival::strata(", paste(strata, collapse = ","), ")" )) df_cox <- cbind(df_cox, data[strata]) @@ -303,13 +406,14 @@ s_coxph_pairwise <- ) sum_cox <- summary(cox_fit, conf.int = conf_level, extend = TRUE) orginal_survdiff <- survival::survdiff(formula_cox, data = df_cox) - log_rank_pvalue <- 1 - pchisq(orginal_survdiff$chisq, length(orginal_survdiff$n) - + log_rank_pvalue <- 1 - stats::pchisq(orginal_survdiff$chisq, length(orginal_survdiff$n) - 1) pval <- switch(pval_method, wald = sum_cox$waldtest["pvalue"], `log-rank` = log_rank_pvalue, likelihood = sum_cox$logtest["pvalue"] ) + # Assuming with_label is defined elsewhere list(pvalue = with_label(unname(pval), paste0( "p-value (", pval_method, ")" @@ -335,55 +439,15 @@ s_coxph_pairwise <- } -#' @title Kaplan-Meier Survival Curve Fitting -#' -#' @description This helper function fits a Kaplan-Meier survival curve model -#' using the formula \code{survival::Surv(tte, is_event) ~ arm}. -#' It is designed to be a prerequisite for plotting functions like \code{\link{g_km}}. -#' -#' @param df A data frame containing time-to-event (tte), event status (\code{is_event}), -#' and treatment arm (\code{arm}) variables. -#' @param variables A named list specifying the column names for time-to-event (\code{tte}), -#' event status (\code{is_event}), and treatment arm (\code{arm}) in \code{df}. -#' For example: \code{list(tte = "time_column", is_event = "status_column", arm = "group_column")}. -#' @param control_surv A list of control parameters for the \code{survival::survfit} function, -#' typically generated by \code{\link{control_surv_timepoint}}, controlling confidence level -#' and confidence interval type. -#' -#' @return An object of class \code{survfit} from the \code{survival} package, containing -#' the fitted Kaplan-Meier curves. -#' @export -h_km_fit <- function(df, variables, control_surv = control_surv_timepoint()) { - tte <- variables$tte - is_event <- variables$is_event - arm <- variables$arm - - assert_valid_factor(df[[arm]]) - assert_df_with_variables(df, list(tte = tte, is_event = is_event, arm = arm)) - - formula <- stats::as.formula(paste0("survival::Surv(", tte, ", ", is_event, ") ~ ", arm)) - fit_km <- survival::survfit( - formula = formula, - data = df, - conf.int = control_surv$conf_level, - conf.type = control_surv$conf_type - ) - return(fit_km) -} +## Core Plotting and Annotation Functions -#' @title Generate a Kaplan-Meier Plot with Annotations +#' @title Generate a Kaplan-Meier Plot #' -#' @description This function creates a comprehensive ggplot2 object for a Kaplan-Meier -#' survival curve, optionally including annotations for median survival and Cox-PH results, -#' and a 'Numbers at Risk' table below the main plot. +#' @description This function creates a comprehensive \code{ggplot2} object for a Kaplan-Meier +#' survival curve, with support for various customizations like censoring marks, CIs, and axis control. #' -#' @param fit_km A fitted Kaplan-Meier object of class \code{survfit}, typically -#' generated by \code{\link{h_km_fit}}. -#' @param variables A named list specifying the survival and grouping variables (needed -#' for accessing the column names, even if the fit is provided). -#' @param coxph_tbl An optional data frame containing pre-calculated Cox-PH results, -#' typically generated by \code{\link{h_tbl_coxph_pairwise}}. If provided, a Cox-PH annotation table is added. -#' @param control_surv A list of control parameters for the survival model, from \code{\link{control_surv_timepoint}}. +#' @param surv_plot_data A data frame containing the pre-processed survival data, ready for plotting. +#' This data should be equivalent to the output of \code{\link{h_data_plot}}. #' @param col A character vector of colors for the survival curves. Length should match number of arms. #' @param lty A vector of line types for the survival curves, or \code{NULL} for default. #' @param lwd Numeric value specifying line width for the survival curves. @@ -400,83 +464,50 @@ h_km_fit <- function(df, variables, control_surv = control_surv_timepoint()) { #' @param footnotes Character string for plot footnotes/caption. #' @param font_size Numeric, base font size for the plot theme. #' @param ci_ribbon Logical, whether to display confidence intervals as a ribbon (area). -#' @param annot_at_risk Logical, whether to include the 'Numbers at Risk' table below the plot. -#' @param annot_at_risk_title Logical, whether to include the title "Patients at Risk:" in the table. -#' @param annot_surv_med Logical, whether to include the median survival time annotation table. -#' @param control_annot_surv_med A list of control parameters for the median survival annotation box, -#' typically generated by \code{\link{control_surv_med_annot}}. -#' @param control_annot_coxph A list of control parameters for the Cox-PH annotation box, -#' typically generated by \code{\link{control_coxph_annot}}. #' @param legend_pos Numeric vector of length 2 for legend position (x, y) relative to the plot area (0 to 1), or \code{NULL} for auto-placement. -#' @param rel_height_plot Numeric, relative height of the main plot area compared to the 'at-risk' table (0 to 1). #' @param ggtheme An optional \code{ggplot2} theme to apply. -#' @param as_list Logical, if \code{TRUE}, returns a list containing the plot and the 'at-risk' table as separate \code{cowplot} objects; otherwise returns the combined \code{cowplot} object. #' -#' @return A \code{cowplot} object combining the KM plot and the 'Numbers at Risk' table, or a list if \code{as_list = TRUE}. +#' @return A \code{ggplot2} object of the KM plot. #' @importFrom ggplot2 ggplot aes theme_bw scale_y_continuous labs theme element_text element_blank element_rect element_line geom_step geom_point scale_shape_manual guides guide_legend geom_ribbon scale_color_manual scale_fill_manual scale_linetype_manual coord_cartesian -#' @importFrom cowplot ggdraw draw_plot plot_grid -#' @importFrom tidyr pivot_wider -#' @importFrom survival survfit -#' @importFrom broom tidy -#' @importFrom stats as.formula +#' @importFrom rlang .data #' @export -g_km <- function(fit_km, - variables, - coxph_tbl = NULL, # New argument for pre-calculated Cox-PH table - control_surv = control_surv_timepoint(), - col = NULL, - lty = NULL, - lwd = 0.5, - censor_show = TRUE, - pch = 3, - size = 2, - max_time = NULL, - xticks = NULL, - xlab = "Days", - yval = c("Survival", "Failure"), - ylab = paste(yval, "Probability"), - ylim = NULL, - title = NULL, - footnotes = NULL, - font_size = 10, - ci_ribbon = FALSE, - annot_at_risk = TRUE, - annot_at_risk_title = TRUE, - annot_surv_med = TRUE, - control_annot_surv_med = control_surv_med_annot(), - control_annot_coxph = control_coxph_annot(), - legend_pos = NULL, - rel_height_plot = 0.75, - ggtheme = NULL, - as_list = FALSE) { - # --- Data Extraction and Assertions --- - checkmate::assert_class(fit_km, "survfit") - checkmate::assert_list(variables) - checkmate::assert_subset(c("tte", "arm", "is_event"), names(variables)) - - # 1. Extract arm values (strata names) from the fitted object - # h_data_plot is used here only to consistently get the unique strata levels - armval <- if (is.null(fit_km$strata)) "All" else levels(h_data_plot(fit_km, max_time = 1)$strata) +g_km <- function( + surv_plot_data, + col = NULL, + lty = NULL, + lwd = 0.5, + censor_show = TRUE, + pch = 3, + size = 2, + max_time = NULL, + xticks = NULL, + xlab = "Days", + yval = c("Survival", "Failure"), + ylab = paste(yval, "Probability"), + ylim = NULL, + title = NULL, + footnotes = NULL, + font_size = 10, + ci_ribbon = FALSE, + legend_pos = NULL, + ggtheme = NULL) { + # ... (function body remains the same) + checkmate::assert_data_frame(surv_plot_data, min.cols = 7, min.rows = 1) + data <- surv_plot_data + + armval <- levels(data$strata) checkmate::assert_vector(col, len = length(armval), null.ok = TRUE) - # Check if a Cox-PH table was provided (replaces annot_coxph flag) - if (!is.null(coxph_tbl)) { - checkmate::assert_data_frame(coxph_tbl) - } - - # --- Data Processing --- yval <- match.arg(yval) - data <- h_data_plot(fit_km, armval = armval, max_time = max_time) + xticks <- h_xticks(data = data, xticks = xticks, max_time = max_time) - # change estimates of survival to estimates of failure (1 - survival) if (yval == "Failure") { data[c("estimate", "conf.low", "conf.high", "censor")] <- list( 1 - data$estimate, 1 - data$conf.low, 1 - data$conf.high, 1 - data$censor ) } - # derive y-axis limits if (is.null(ylim)) { if (!is.null(max_time)) { y_lwr <- min(data[data$time < max_time, ][["estimate"]]) @@ -488,46 +519,43 @@ g_km <- function(fit_km, ylim <- c(y_lwr, y_upr) } - # --- ggplot Initialization and Aesthetics (Unchanged) --- gg_plt <- ggplot2::ggplot( data = data, - mapping = aes( + mapping = ggplot2::aes( x = .data[["time"]], y = .data[["estimate"]], ymin = .data[["conf.low"]], ymax = .data[["conf.high"]], color = .data[["strata"]], fill = .data[["strata"]] ) ) + - theme_bw(base_size = font_size) + - scale_y_continuous(limits = ylim, expand = c(0.025, 0)) + - labs(title = title, x = xlab, y = ylab, caption = footnotes) + - theme( - axis.text = element_text(size = font_size), axis.title = element_text(size = font_size), - legend.title = element_blank(), legend.text = element_text(size = font_size), - legend.box.background = element_rect(fill = "white", linewidth = 0.5), - legend.background = element_blank(), legend.position = "inside", - legend.spacing.y = unit(-0.02, "npc"), panel.grid.major = element_blank(), - panel.grid.minor = element_blank() + ggplot2::theme_bw(base_size = font_size) + + ggplot2::scale_y_continuous(limits = ylim, expand = c(0.025, 0)) + + ggplot2::labs(title = title, x = xlab, y = ylab, caption = footnotes) + + ggplot2::theme( + axis.text = ggplot2::element_text(size = font_size), axis.title = ggplot2::element_text(size = font_size), + legend.title = ggplot2::element_blank(), legend.text = ggplot2::element_text(size = font_size), + legend.box.background = ggplot2::element_rect(fill = "white", linewidth = 0.5), + legend.background = ggplot2::element_blank(), legend.position = "inside", + legend.spacing.y = ggplot2::unit(-0.02, "npc"), panel.grid.major = ggplot2::element_blank(), + panel.grid.minor = ggplot2::element_blank() ) - # derive x-axis limits if (!is.null(max_time) && !is.null(xticks)) { - gg_plt <- gg_plt + scale_x_continuous( + gg_plt <- gg_plt + ggplot2::scale_x_continuous( breaks = xticks, limits = c(min(0, xticks), max(c(xticks, max_time))), expand = c(0.025, 0) ) } else if (!is.null(xticks)) { if (max(data$time) <= max(xticks)) { - gg_plt <- gg_plt + scale_x_continuous( + gg_plt <- gg_plt + ggplot2::scale_x_continuous( breaks = xticks, limits = c(min(0, min(xticks)), max(xticks)), expand = c(0.025, 0) ) } else { - gg_plt <- gg_plt + scale_x_continuous(breaks = xticks, expand = c(0.025, 0)) + gg_plt <- gg_plt + ggplot2::scale_x_continuous(breaks = xticks, expand = c(0.025, 0)) } } else if (!is.null(max_time)) { - gg_plt <- gg_plt + scale_x_continuous(limits = c(0, max_time), expand = c(0.025, 0)) + gg_plt <- gg_plt + ggplot2::scale_x_continuous(limits = c(0, max_time), expand = c(0.025, 0)) } - # set legend position (unchanged logic) if (!is.null(legend_pos)) { - gg_plt <- gg_plt + theme(legend.position.inside = legend_pos) + gg_plt <- gg_plt + ggplot2::theme(legend.position.inside = legend_pos) } else { max_time2 <- sort( data$time, @@ -539,154 +567,275 @@ g_km <- function(fit_km, if (yval == "Survival" && all(data$estimate[data$time == max_time2] > ylim[1] + 0.09 * y_rng) && all(data$estimate[data$time == max_time2] < ylim[1] + 0.5 * y_rng)) { # nolint gg_plt <- gg_plt + - theme( + ggplot2::theme( legend.position.inside = c(1, 0.5), legend.justification = c(1.1, 0.6) ) } else { gg_plt <- gg_plt + - theme( + ggplot2::theme( legend.position.inside = c(1, 0), legend.justification = c(1.1, -0.4) ) } } - # add lines, censor marks, ci ribbon, and colors (unchanged) gg_plt <- if (is.null(lty)) { - gg_plt + geom_step(linewidth = lwd, na.rm = TRUE) + gg_plt + ggplot2::geom_step(linewidth = lwd, na.rm = TRUE) } else if (length(lty) == 1) { - gg_plt + geom_step(linewidth = lwd, lty = lty, na.rm = TRUE) + gg_plt + ggplot2::geom_step(linewidth = lwd, lty = lty, na.rm = TRUE) } else { gg_plt + - geom_step(aes(lty = .data[["strata"]]), linewidth = lwd, na.rm = TRUE) + - scale_linetype_manual(values = lty) + ggplot2::geom_step(ggplot2::aes(lty = .data[["strata"]]), linewidth = lwd, na.rm = TRUE) + + ggplot2::scale_linetype_manual(values = lty) } if (censor_show) { - gg_plt <- gg_plt + geom_point( + gg_plt <- gg_plt + ggplot2::geom_point( data = data[data$n.censor != 0, ], - aes(x = .data[["time"]], y = .data[["censor"]], shape = "Censored"), + ggplot2::aes(x = .data[["time"]], y = .data[["censor"]], shape = "Censored"), size = size, na.rm = TRUE ) + - scale_shape_manual(name = NULL, values = pch) + - guides(fill = guide_legend(override.aes = list(shape = NA))) + ggplot2::scale_shape_manual(name = NULL, values = pch) + + ggplot2::guides(fill = ggplot2::guide_legend(override.aes = list(shape = NA))) } - if (ci_ribbon) gg_plt <- gg_plt + geom_ribbon(alpha = 0.3, lty = 0, na.rm = TRUE) + if (ci_ribbon) gg_plt <- gg_plt + ggplot2::geom_ribbon(alpha = 0.3, lty = 0, na.rm = TRUE) if (!is.null(col)) { gg_plt <- gg_plt + - scale_color_manual(values = col) + - scale_fill_manual(values = col) + ggplot2::scale_color_manual(values = col) + + ggplot2::scale_fill_manual(values = col) } if (!is.null(ggtheme)) gg_plt <- gg_plt + ggtheme - # --- Annotation Tables --- - - # 2. Median survival time annotation table - if (annot_surv_med) { - surv_med_tbl <- h_tbl_median_surv(fit_km = fit_km, armval = armval) - bg_fill <- if (isTRUE(control_annot_surv_med[["fill"]])) "#00000020" else control_annot_surv_med[["fill"]] - - gg_surv_med <- df2gg(surv_med_tbl, font_size = font_size, colwidths = c(1, 1, 2), bg_fill = bg_fill) + - theme( - axis.text.y = element_text(size = font_size, face = "italic", hjust = 1), - plot.margin = margin(0, 2, 0, 5) - ) + - coord_cartesian(clip = "off", ylim = c(0.5, nrow(surv_med_tbl) + 1.5)) - gg_surv_med <- suppressMessages( - gg_surv_med + - scale_x_continuous(expand = c(0.025, 0)) + - scale_y_continuous(labels = rev(rownames(surv_med_tbl)), breaks = seq_len(nrow(surv_med_tbl))) + + gg_plt +} + +#' @title Annotate Kaplan-Meier Plot with Median Survival Table +#' +#' @description Adds a median survival time summary table as an annotation box on a +#' Kaplan-Meier plot using \code{cowplot}. +#' +#' @param gg_plt A \code{ggplot2} or \code{cowplot} object of the Kaplan-Meier plot. +#' @param fit_km A fitted Kaplan-Meier object of class \code{survfit}, used to generate the table data. +#' @param control_annot_surv_med A list of control parameters for the annotation box, +#' typically generated by \code{\link{control_surv_med_annot}}. +#' @param font_size Numeric, base font size for the annotation table. +#' +#' @return A \code{cowplot} object with the median survival table annotation added. +#' @importFrom cowplot ggdraw draw_plot +#' @importFrom ggplot2 theme element_text coord_cartesian scale_x_continuous scale_y_continuous margin +#' @export +annot_surv_med <- function(gg_plt, fit_km, control_annot_surv_med = control_surv_med_annot(), font_size = 10) { + # Determine armval for h_tbl_median_surv, assuming it's available in the calling environment or logic should be updated + # For now, keeping as is, but this typically requires armval or inferring it from fit_km + armval <- if (is.null(fit_km$strata)) "All" else levels(fit_km$strata) # Placeholder for armval + + surv_med_tbl <- h_tbl_median_surv(fit_km = fit_km, armval = armval) + bg_fill <- if (isTRUE(control_annot_surv_med[["fill"]])) "#00000020" else control_annot_surv_med[["fill"]] + + gg_surv_med <- df2gg(surv_med_tbl, font_size = font_size, colwidths = c(1, 1, 2), bg_fill = bg_fill) + + ggplot2::theme( + axis.text.y = ggplot2::element_text(size = font_size, face = "italic", hjust = 1), + plot.margin = ggplot2::margin(0, 2, 0, 5) + ) + + ggplot2::coord_cartesian(clip = "off", ylim = c(0.5, nrow(surv_med_tbl) + 1.5)) + gg_surv_med <- suppressMessages( + gg_surv_med + + ggplot2::scale_x_continuous(expand = c(0.025, 0)) + + ggplot2::scale_y_continuous(labels = rev(rownames(surv_med_tbl)), breaks = seq_len(nrow(surv_med_tbl))) + ) + + gg_plt <- cowplot::ggdraw(gg_plt) + + cowplot::draw_plot( + gg_surv_med, control_annot_surv_med[["x"]], control_annot_surv_med[["y"]], + width = control_annot_surv_med[["w"]], height = control_annot_surv_med[["h"]], + vjust = 0.5, hjust = 0.5 ) + gg_plt +} - gg_plt <- cowplot::ggdraw(gg_plt) + - cowplot::draw_plot( - gg_surv_med, control_annot_surv_med[["x"]], control_annot_surv_med[["y"]], - width = control_annot_surv_med[["w"]], height = control_annot_surv_med[["h"]], - vjust = 0.5, hjust = 0.5 - ) - } +#' @title Annotate Kaplan-Meier Plot with Cox-PH Table +#' +#' @description Adds a Cox Proportional Hazards summary table as an annotation box on a +#' Kaplan-Meier plot using \code{cowplot}. +#' +#' @param gg_plt A \code{ggplot2} or \code{cowplot} object of the Kaplan-Meier plot. +#' @param coxph_tbl A data frame containing pre-calculated Cox-PH results, +#' typically generated by \code{\link{h_tbl_coxph_pairwise}}. +#' @param control_annot_coxph A list of control parameters for the annotation box, +#' typically generated by \code{\link{control_coxph_annot}}. +#' @param font_size Numeric, base font size for the annotation table. +#' +#' @return A \code{cowplot} object with the Cox-PH table annotation added. +#' @importFrom cowplot ggdraw draw_plot +#' @importFrom ggplot2 theme element_text coord_cartesian scale_x_continuous scale_y_continuous margin +#' @export +annot_cox_ph <- function(gg_plt, coxph_tbl, control_annot_coxph = control_coxph_annot(), font_size = 10) { + # ... (function body remains the same) + bg_fill <- if (isTRUE(control_annot_coxph[["fill"]])) "#00000020" else control_annot_coxph[["fill"]] + + gg_coxph <- df2gg(coxph_tbl, font_size = font_size, colwidths = c(1.1, 1, 3), bg_fill = bg_fill) + + ggplot2::theme( + axis.text.y = ggplot2::element_text(size = font_size, face = "italic", hjust = 1), + plot.margin = ggplot2::margin(0, 2, 0, 5) + ) + + ggplot2::coord_cartesian(clip = "off", ylim = c(0.5, nrow(coxph_tbl) + 1.5)) + gg_coxph <- suppressMessages( + gg_coxph + + ggplot2::scale_x_continuous(expand = c(0.025, 0)) + + ggplot2::scale_y_continuous(labels = rev(rownames(coxph_tbl)), breaks = seq_len(nrow(coxph_tbl))) + ) - # 3. Cox-PH annotation table - if (!is.null(coxph_tbl)) { - # coxph_tbl is pre-computed outside g_km, just plot it - bg_fill <- if (isTRUE(control_annot_coxph[["fill"]])) "#00000020" else control_annot_coxph[["fill"]] - - gg_coxph <- df2gg(coxph_tbl, font_size = font_size, colwidths = c(1.1, 1, 3), bg_fill = bg_fill) + - theme( - axis.text.y = element_text(size = font_size, face = "italic", hjust = 1), - plot.margin = margin(0, 2, 0, 5) - ) + - coord_cartesian(clip = "off", ylim = c(0.5, nrow(coxph_tbl) + 1.5)) - gg_coxph <- suppressMessages( - gg_coxph + - scale_x_continuous(expand = c(0.025, 0)) + - scale_y_continuous(labels = rev(rownames(coxph_tbl)), breaks = seq_len(nrow(coxph_tbl))) + gg_plt <- cowplot::ggdraw(gg_plt) + + cowplot::draw_plot( + gg_coxph, control_annot_coxph[["x"]], control_annot_coxph[["y"]], + width = control_annot_coxph[["w"]], height = control_annot_coxph[["h"]], + vjust = 0.5, hjust = 0.5 ) + gg_plt +} - gg_plt <- cowplot::ggdraw(gg_plt) + - cowplot::draw_plot( - gg_coxph, control_annot_coxph[["x"]], control_annot_coxph[["y"]], - width = control_annot_coxph[["w"]], height = control_annot_coxph[["h"]], - vjust = 0.5, hjust = 0.5 - ) +#' @title Annotate Plot with Numbers at Risk Table +#' +#' @description Adds a "Numbers at Risk" table below a Kaplan-Meier plot using \code{cowplot::plot_grid}. +#' +#' @param gg_plt A \code{ggplot2} or \code{cowplot} object of the Kaplan-Meier plot. +#' @param fit_km A fitted Kaplan-Meier object of class \code{survfit}, used to generate the table data. +#' @param font_size Numeric, base font size for the table. +#' @param annot_at_risk_title Logical, whether to include the title "Patients at Risk:". +#' @param rel_height_plot Numeric, relative height of the main plot area compared to the 'at-risk' table (0 to 1). +#' @param xlab Character string for the x-axis label on the 'at-risk' table (typically time). +#' +#' @return A \code{cowplot} object combining the KM plot and the 'Numbers at Risk' table. +#' @importFrom broom tidy +#' @importFrom tidyr pivot_wider +#' @importFrom cowplot plot_grid +#' @importFrom ggplot2 labs theme_bw theme element_text element_blank element_line coord_cartesian scale_x_continuous scale_y_continuous +#' @export +annot_at_risk <- function(gg_plt, fit_km, font_size = 10, annot_at_risk_title = TRUE, rel_height_plot = 0.75, xlab = "Days") { + # ... (function body remains the same) + data <- broom::tidy(fit_km) + xticks <- h_xticks(data = data) + annot_tbl <- summary(fit_km, times = xticks, extend = TRUE) + + # Placeholder for armval, should be retrieved from fit_km or passed as argument + armval <- if (is.null(fit_km$strata)) "All" else levels(fit_km$strata) + + annot_tbl <- if (is.null(fit_km$strata)) { + data.frame( + n.risk = annot_tbl$n.risk, + time = annot_tbl$time, + strata = armval + ) + } else { + strata_lst <- strsplit(sub("=", "equals", levels(annot_tbl$strata)), "equals") + levels(annot_tbl$strata) <- matrix(unlist(strata_lst), ncol = 2, byrow = TRUE)[, 2] + data.frame( + n.risk = annot_tbl$n.risk, + time = annot_tbl$time, + strata = annot_tbl$strata + ) } - # add at risk annotation table (unchanged logic) - if (annot_at_risk) { - annot_tbl <- summary(fit_km, times = xticks, extend = TRUE) - annot_tbl <- if (is.null(fit_km$strata)) { - data.frame( - n.risk = annot_tbl$n.risk, time = annot_tbl$time, strata = armval - ) - } else { - strata_lst <- strsplit(sub("=", "equals", levels(annot_tbl$strata)), "equals") - levels(annot_tbl$strata) <- matrix(unlist(strata_lst), ncol = 2, byrow = TRUE)[, 2] - data.frame( - n.risk = annot_tbl$n.risk, time = annot_tbl$time, strata = annot_tbl$strata - ) - } - - at_risk_tbl <- as.data.frame(tidyr::pivot_wider(annot_tbl, names_from = "time", values_from = "n.risk")[, -1]) - at_risk_tbl[is.na(at_risk_tbl)] <- 0 - rownames(at_risk_tbl) <- levels(annot_tbl$strata) + at_risk_tbl <- as.data.frame(tidyr::pivot_wider(annot_tbl, names_from = "time", values_from = "n.risk")[, -1]) + at_risk_tbl[is.na(at_risk_tbl)] <- 0 + rownames(at_risk_tbl) <- levels(annot_tbl$strata) - gg_at_risk <- df2gg( - at_risk_tbl, - font_size = font_size, col_labels = FALSE, hline = FALSE, - colwidths = rep(1, ncol(at_risk_tbl)) + gg_at_risk <- df2gg( + at_risk_tbl, + font_size = font_size, col_labels = FALSE, hline = FALSE, + colwidths = rep(1, ncol(at_risk_tbl)) + ) + + ggplot2::labs(title = if (annot_at_risk_title) "Patients at Risk:" else NULL, x = xlab) + + ggplot2::theme_bw(base_size = font_size) + + ggplot2::theme( + plot.title = ggplot2::element_text(size = font_size, vjust = 3, face = "bold"), + panel.border = ggplot2::element_blank(), + panel.grid = ggplot2::element_blank(), + axis.title.y = ggplot2::element_blank(), + axis.ticks.y = ggplot2::element_blank(), + axis.text.y = ggplot2::element_text(size = font_size, face = "italic", hjust = 1), + axis.text.x = ggplot2::element_text(size = font_size), + axis.line.x = ggplot2::element_line() ) + - labs(title = if (annot_at_risk_title) "Patients at Risk:" else NULL, x = xlab) + - theme_bw(base_size = font_size) + - theme( - plot.title = element_text(size = font_size, vjust = 3, face = "bold"), - panel.border = element_blank(), panel.grid = element_blank(), - axis.title.y = element_blank(), axis.ticks.y = element_blank(), - axis.text.y = element_text(size = font_size, face = "italic", hjust = 1), - axis.text.x = element_text(size = font_size), axis.line.x = element_line() - ) + - coord_cartesian(clip = "off", ylim = c(0.5, nrow(at_risk_tbl))) - gg_at_risk <- suppressMessages( - gg_at_risk + - scale_x_continuous(expand = c(0.025, 0), breaks = seq_along(at_risk_tbl) - 0.5, labels = xticks) + - scale_y_continuous(labels = rev(levels(annot_tbl$strata)), breaks = seq_len(nrow(at_risk_tbl))) - ) + ggplot2::coord_cartesian(clip = "off", ylim = c(0.5, nrow(at_risk_tbl))) + gg_at_risk <- suppressMessages( + gg_at_risk + + ggplot2::scale_x_continuous(expand = c(0.1, 0), breaks = seq_along(at_risk_tbl) - 0.5, labels = xticks) + + ggplot2::scale_y_continuous(labels = rev(levels(annot_tbl$strata)), breaks = seq_len(nrow(at_risk_tbl))) + ) - if (!as_list) { - gg_plt <- cowplot::plot_grid( - gg_plt, gg_at_risk, - align = "v", axis = "tblr", ncol = 1, - rel_heights = c(rel_height_plot, 1 - rel_height_plot) - ) - } - } + gg_plt <- cowplot::plot_grid( + gg_plt, gg_at_risk, + align = "v", axis = "tblr", ncol = 1, + rel_heights = c(rel_height_plot, 1 - rel_height_plot) + ) + gg_plt +} - if (as_list) { - list(plot = gg_plt, table = gg_at_risk) - } else { - gg_plt +# styler: off +# nocov start + + + + ## Control and Internal KM Fit (from `tern` or similar) + + #' @title Control parameters for Survival Timepoint Estimation + #' + #' @description Creates a list of control parameters for \code{survival::survfit} when used + #' for timepoint estimation. + #' + #' @param conf_level A numeric value (0 to 1) for the confidence level. + #' @param conf_type A character string specifying the type of confidence interval. + #' Must be one of \code{"plain"}, \code{"log"}, or \code{"log-log"}. + #' + #' @return A list with elements \code{conf_level} and \code{conf_type}. + control_surv_timepoint <- function(conf_level = 0.95, conf_type = c("plain", "log", "log-log")) { + conf_type <- match.arg(conf_type) + assert_proportion_value(conf_level) # Assuming assert_proportion_value is defined elsewhere + list(conf_level = conf_level, conf_type = conf_type) } + + +#' @title Kaplan-Meier Survival Curve Fitting +#' +#' @description This helper function fits a Kaplan-Meier survival curve model +#' using the formula \code{survival::Surv(tte, is_event) ~ arm}. +#' It is designed to be a prerequisite for plotting functions like \code{\link{g_km}}. +#' +#' @param df A data frame containing time-to-event (tte), event status (\code{is_event}), +#' and treatment arm (\code{arm}) variables. +#' @param variables A named list specifying the column names for time-to-event (\code{tte}), +#' event status (\code{is_event}), and treatment arm (\code{arm}) in \code{df}. +#' @param control_surv A list of control parameters for the \code{survival::survfit} function, +#' typically generated by \code{\link{control_surv_timepoint}}, controlling confidence level +#' and confidence interval type. +#' +#' @return An object of class \code{survfit} from the \code{survival} package, containing +#' the fitted Kaplan-Meier curves. +#' @importFrom survival survfit Surv +#' @importFrom stats as.formula +h_km_fit <- function(df, variables, control_surv = control_surv_timepoint()) { + tte <- variables$tte + is_event <- variables$is_event + arm <- variables$arm + + # Assuming assert_valid_factor and assert_df_with_variables are defined elsewhere + assert_valid_factor(df[[arm]]) + assert_df_with_variables(df, list(tte = tte, is_event = is_event, arm = arm)) + + formula <- stats::as.formula(paste0("survival::Surv(", tte, ", ", is_event, ") ~ ", arm)) + fit_km <- survival::survfit( + formula = formula, + data = df, + conf.int = control_surv$conf_level, + conf.type = control_surv$conf_type + ) + return(fit_km) } +# nocov end +# styler: on diff --git a/man/annot_at_risk.Rd b/man/annot_at_risk.Rd new file mode 100644 index 00000000..3892dbcc --- /dev/null +++ b/man/annot_at_risk.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gkm.R +\name{annot_at_risk} +\alias{annot_at_risk} +\title{Annotate Plot with Numbers at Risk Table} +\usage{ +annot_at_risk( + gg_plt, + fit_km, + font_size = 10, + annot_at_risk_title = TRUE, + rel_height_plot = 0.75, + xlab = "Days" +) +} +\arguments{ +\item{gg_plt}{A \code{ggplot2} or \code{cowplot} object of the Kaplan-Meier plot.} + +\item{fit_km}{A fitted Kaplan-Meier object of class \code{survfit}, used to generate the table data.} + +\item{font_size}{Numeric, base font size for the table.} + +\item{annot_at_risk_title}{Logical, whether to include the title "Patients at Risk:".} + +\item{rel_height_plot}{Numeric, relative height of the main plot area compared to the 'at-risk' table (0 to 1).} + +\item{xlab}{Character string for the x-axis label on the 'at-risk' table (typically time).} +} +\value{ +A \code{cowplot} object combining the KM plot and the 'Numbers at Risk' table. +} +\description{ +Adds a "Numbers at Risk" table below a Kaplan-Meier plot using \code{cowplot::plot_grid}. +} diff --git a/man/annot_cox_ph.Rd b/man/annot_cox_ph.Rd new file mode 100644 index 00000000..cbfce564 --- /dev/null +++ b/man/annot_cox_ph.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gkm.R +\name{annot_cox_ph} +\alias{annot_cox_ph} +\title{Annotate Kaplan-Meier Plot with Cox-PH Table} +\usage{ +annot_cox_ph( + gg_plt, + coxph_tbl, + control_annot_coxph = control_coxph_annot(), + font_size = 10 +) +} +\arguments{ +\item{gg_plt}{A \code{ggplot2} or \code{cowplot} object of the Kaplan-Meier plot.} + +\item{coxph_tbl}{A data frame containing pre-calculated Cox-PH results, +typically generated by \code{\link{h_tbl_coxph_pairwise}}.} + +\item{control_annot_coxph}{A list of control parameters for the annotation box, +typically generated by \code{\link{control_coxph_annot}}.} + +\item{font_size}{Numeric, base font size for the annotation table.} +} +\value{ +A \code{cowplot} object with the Cox-PH table annotation added. +} +\description{ +Adds a Cox Proportional Hazards summary table as an annotation box on a +Kaplan-Meier plot using \code{cowplot}. +} diff --git a/man/annot_surv_med.Rd b/man/annot_surv_med.Rd new file mode 100644 index 00000000..0e2fa21c --- /dev/null +++ b/man/annot_surv_med.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gkm.R +\name{annot_surv_med} +\alias{annot_surv_med} +\title{Annotate Kaplan-Meier Plot with Median Survival Table} +\usage{ +annot_surv_med( + gg_plt, + fit_km, + control_annot_surv_med = control_surv_med_annot(), + font_size = 10 +) +} +\arguments{ +\item{gg_plt}{A \code{ggplot2} or \code{cowplot} object of the Kaplan-Meier plot.} + +\item{fit_km}{A fitted Kaplan-Meier object of class \code{survfit}, used to generate the table data.} + +\item{control_annot_surv_med}{A list of control parameters for the annotation box, +typically generated by \code{\link{control_surv_med_annot}}.} + +\item{font_size}{Numeric, base font size for the annotation table.} +} +\value{ +A \code{cowplot} object with the median survival table annotation added. +} +\description{ +Adds a median survival time summary table as an annotation box on a +Kaplan-Meier plot using \code{cowplot}. +} diff --git a/man/control_coxph.Rd b/man/control_coxph.Rd new file mode 100644 index 00000000..9b79793b --- /dev/null +++ b/man/control_coxph.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gkm.R +\name{control_coxph} +\alias{control_coxph} +\title{Control parameters for Cox Proportional Hazards model} +\usage{ +control_coxph( + pval_method = c("log-rank", "wald", "likelihood"), + ties = c("efron", "breslow", "exact"), + conf_level = 0.95 +) +} +\arguments{ +\item{pval_method}{A character string specifying the method for calculating the p-value. +Must be one of \code{"log-rank"}, \code{"wald"}, or \code{"likelihood"}.} + +\item{ties}{A character string specifying the method for handling tied failure times. +Must be one of \code{"efron"}, \code{"breslow"}, or \code{"exact"}.} + +\item{conf_level}{A numeric value between 0 and 1, specifying the confidence level.} +} +\value{ +A list with elements \code{pval_method}, \code{ties}, and \code{conf_level}. +} +\description{ +Creates a list of control parameters for Cox Proportional Hazards (Cox-PH) +analysis, used by functions like \code{\link{h_tbl_coxph_pairwise}}. +} diff --git a/man/control_coxph_annot.Rd b/man/control_coxph_annot.Rd new file mode 100644 index 00000000..b5e541c2 --- /dev/null +++ b/man/control_coxph_annot.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gkm.R +\name{control_coxph_annot} +\alias{control_coxph_annot} +\title{Control parameters for Cox-PH Annotation Box} +\usage{ +control_coxph_annot( + x = 0.29, + y = 0.51, + w = 0.4, + h = 0.125, + fill = TRUE, + ref_lbls = FALSE +) +} +\arguments{ +\item{x}{A numeric value (0 to 1) for the x-coordinate of the box center (relative to plot area).} + +\item{y}{A numeric value (0 to 1) for the y-coordinate of the box center (relative to plot area).} + +\item{w}{A numeric value (0 to 1) for the width of the box (relative to plot area).} + +\item{h}{A numeric value (0 to 1) for the height of the box (relative to plot area).} + +\item{fill}{A logical value (\code{TRUE} for a default light gray fill) or a color string for the box background.} + +\item{ref_lbls}{A logical flag indicating whether to append "vs. ref group" to row names.} +} +\value{ +A list with elements \code{x}, \code{y}, \code{w}, \code{h}, \code{fill}, and \code{ref_lbls}. +} +\description{ +Creates a list of control parameters for positioning and styling the +Cox Proportional Hazards annotation box on a plot. +} diff --git a/man/control_surv_med_annot.Rd b/man/control_surv_med_annot.Rd new file mode 100644 index 00000000..15c6e3ee --- /dev/null +++ b/man/control_surv_med_annot.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gkm.R +\name{control_surv_med_annot} +\alias{control_surv_med_annot} +\title{Control parameters for Median Survival Annotation Box} +\usage{ +control_surv_med_annot(x = 0.8, y = 0.85, w = 0.32, h = 0.16, fill = TRUE) +} +\arguments{ +\item{x}{A numeric value (0 to 1) for the x-coordinate of the box center (relative to plot area).} + +\item{y}{A numeric value (0 to 1) for the y-coordinate of the box center (relative to plot area).} + +\item{w}{A numeric value (0 to 1) for the width of the box (relative to plot area).} + +\item{h}{A numeric value (0 to 1) for the height of the box (relative to plot area).} + +\item{fill}{A logical value (\code{TRUE} for a default light gray fill) or a color string for the box background.} +} +\value{ +A list with elements \code{x}, \code{y}, \code{w}, \code{h}, and \code{fill}. +} +\description{ +Creates a list of control parameters for positioning and styling the +median survival annotation box on a plot. +} diff --git a/man/control_surv_timepoint.Rd b/man/control_surv_timepoint.Rd new file mode 100644 index 00000000..c2c5979b --- /dev/null +++ b/man/control_surv_timepoint.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gkm.R +\name{control_surv_timepoint} +\alias{control_surv_timepoint} +\title{Control parameters for Survival Timepoint Estimation} +\usage{ +control_surv_timepoint( + conf_level = 0.95, + conf_type = c("plain", "log", "log-log") +) +} +\arguments{ +\item{conf_level}{A numeric value (0 to 1) for the confidence level.} + +\item{conf_type}{A character string specifying the type of confidence interval. +Must be one of \code{"plain"}, \code{"log"}, or \code{"log-log"}.} +} +\value{ +A list with elements \code{conf_level} and \code{conf_type}. +} +\description{ +Creates a list of control parameters for \code{survival::survfit} when used +for timepoint estimation. +} diff --git a/man/df2gg.Rd b/man/df2gg.Rd new file mode 100644 index 00000000..3b3eb820 --- /dev/null +++ b/man/df2gg.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gkm.R +\name{df2gg} +\alias{df2gg} +\title{Convert Data Frame to ggplot2 Table Graphic} +\usage{ +df2gg( + df, + colwidths = NULL, + font_size = 10, + col_labels = TRUE, + col_lab_fontface = "bold", + hline = TRUE, + bg_fill = NULL +) +} +\arguments{ +\item{df}{The data frame to render.} + +\item{colwidths}{Numeric vector of relative column widths. If \code{NULL}, determined by max character length.} + +\item{font_size}{Numeric base font size.} + +\item{col_labels}{Logical, whether to display column labels (header).} + +\item{col_lab_fontface}{Character string for the font face of column labels (e.g., "bold").} + +\item{hline}{Logical, whether to draw a horizontal line below the column labels.} + +\item{bg_fill}{Optional color string for the plot background.} +} +\value{ +A \code{ggplot2} object representing the table. +} +\description{ +Creates a \code{ggplot2} object that renders a data frame as a table graphic. +} diff --git a/man/f_conf_level.Rd b/man/f_conf_level.Rd new file mode 100644 index 00000000..a6653cad --- /dev/null +++ b/man/f_conf_level.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gkm.R +\name{f_conf_level} +\alias{f_conf_level} +\title{Format Confidence Level String} +\usage{ +f_conf_level(conf_level) +} +\arguments{ +\item{conf_level}{A numeric confidence level (proportion, 0 to 1).} +} +\value{ +A character string. +} +\description{ +Converts a confidence level (e.g., 0.95) to a formatted string (e.g., "95\% CI"). +} diff --git a/man/g_km.Rd b/man/g_km.Rd index 7fd3be45..f0b15d19 100644 --- a/man/g_km.Rd +++ b/man/g_km.Rd @@ -2,13 +2,10 @@ % Please edit documentation in R/gkm.R \name{g_km} \alias{g_km} -\title{Generate a Kaplan-Meier Plot with Annotations} +\title{Generate a Kaplan-Meier Plot} \usage{ g_km( - fit_km, - variables, - coxph_tbl = NULL, - control_surv = control_surv_timepoint(), + surv_plot_data, col = NULL, lty = NULL, lwd = 0.5, @@ -25,28 +22,13 @@ g_km( footnotes = NULL, font_size = 10, ci_ribbon = FALSE, - annot_at_risk = TRUE, - annot_at_risk_title = TRUE, - annot_surv_med = TRUE, - control_annot_surv_med = control_surv_med_annot(), - control_annot_coxph = control_coxph_annot(), legend_pos = NULL, - rel_height_plot = 0.75, - ggtheme = NULL, - as_list = FALSE + ggtheme = NULL ) } \arguments{ -\item{fit_km}{A fitted Kaplan-Meier object of class \code{survfit}, typically -generated by \code{\link{h_km_fit}}.} - -\item{variables}{A named list specifying the survival and grouping variables (needed -for accessing the column names, even if the fit is provided).} - -\item{coxph_tbl}{An optional data frame containing pre-calculated Cox-PH results, -typically generated by \code{\link{h_tbl_coxph_pairwise}}. If provided, a Cox-PH annotation table is added.} - -\item{control_surv}{A list of control parameters for the survival model, from \code{\link{control_surv_timepoint}}.} +\item{surv_plot_data}{A data frame containing the pre-processed survival data, ready for plotting. +This data should be equivalent to the output of \code{\link{h_data_plot}}.} \item{col}{A character vector of colors for the survival curves. Length should match number of arms.} @@ -80,31 +62,14 @@ typically generated by \code{\link{h_tbl_coxph_pairwise}}. If provided, a Cox-PH \item{ci_ribbon}{Logical, whether to display confidence intervals as a ribbon (area).} -\item{annot_at_risk}{Logical, whether to include the 'Numbers at Risk' table below the plot.} - -\item{annot_at_risk_title}{Logical, whether to include the title "Patients at Risk:" in the table.} - -\item{annot_surv_med}{Logical, whether to include the median survival time annotation table.} - -\item{control_annot_surv_med}{A list of control parameters for the median survival annotation box, -typically generated by \code{\link{control_surv_med_annot}}.} - -\item{control_annot_coxph}{A list of control parameters for the Cox-PH annotation box, -typically generated by \code{\link{control_coxph_annot}}.} - \item{legend_pos}{Numeric vector of length 2 for legend position (x, y) relative to the plot area (0 to 1), or \code{NULL} for auto-placement.} -\item{rel_height_plot}{Numeric, relative height of the main plot area compared to the 'at-risk' table (0 to 1).} - \item{ggtheme}{An optional \code{ggplot2} theme to apply.} - -\item{as_list}{Logical, if \code{TRUE}, returns a list containing the plot and the 'at-risk' table as separate \code{cowplot} objects; otherwise returns the combined \code{cowplot} object.} } \value{ -A \code{cowplot} object combining the KM plot and the 'Numbers at Risk' table, or a list if \code{as_list = TRUE}. +A \code{ggplot2} object of the KM plot. } \description{ -This function creates a comprehensive ggplot2 object for a Kaplan-Meier -survival curve, optionally including annotations for median survival and Cox-PH results, -and a 'Numbers at Risk' table below the main plot. +This function creates a comprehensive \code{ggplot2} object for a Kaplan-Meier +survival curve, with support for various customizations like censoring marks, CIs, and axis control. } diff --git a/man/h_data_plot.Rd b/man/h_data_plot.Rd new file mode 100644 index 00000000..159bc6c8 --- /dev/null +++ b/man/h_data_plot.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gkm.R +\name{h_data_plot} +\alias{h_data_plot} +\title{Prepare Kaplan-Meier Data for Plotting} +\usage{ +h_data_plot(fit_km, armval = "All", max_time = NULL) +} +\arguments{ +\item{fit_km}{A fitted Kaplan-Meier object of class \code{survfit}.} + +\item{armval}{Character string for the strata level if \code{fit_km} has no strata (e.g., "All").} + +\item{max_time}{Numeric, the maximum time point to include in the data, or \code{NULL} for no limit.} +} +\value{ +A data frame containing the survival curve steps, confidence intervals, and censoring info. +} +\description{ +Takes a fitted \code{survfit} object and processes it into a data frame +suitable for plotting a Kaplan-Meier curve with \code{ggplot2}, including extending +the curve to time zero. +} diff --git a/man/h_km_fit.Rd b/man/h_km_fit.Rd index bbb66746..267a4527 100644 --- a/man/h_km_fit.Rd +++ b/man/h_km_fit.Rd @@ -11,8 +11,7 @@ h_km_fit(df, variables, control_surv = control_surv_timepoint()) and treatment arm (\code{arm}) variables.} \item{variables}{A named list specifying the column names for time-to-event (\code{tte}), -event status (\code{is_event}), and treatment arm (\code{arm}) in \code{df}. -For example: \code{list(tte = "time_column", is_event = "status_column", arm = "group_column")}.} +event status (\code{is_event}), and treatment arm (\code{arm}) in \code{df}.} \item{control_surv}{A list of control parameters for the \code{survival::survfit} function, typically generated by \code{\link{control_surv_timepoint}}, controlling confidence level diff --git a/man/h_tbl_median_surv.Rd b/man/h_tbl_median_surv.Rd new file mode 100644 index 00000000..a99e272d --- /dev/null +++ b/man/h_tbl_median_surv.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gkm.R +\name{h_tbl_median_surv} +\alias{h_tbl_median_surv} +\title{Median Survival Summary Table} +\usage{ +h_tbl_median_surv(fit_km, armval = "All") +} +\arguments{ +\item{fit_km}{A fitted Kaplan-Meier object of class \code{survfit}.} + +\item{armval}{Character string to use as the row name if \code{fit_km} has no strata (e.g., "All").} +} +\value{ +A data frame with columns "N", "Median", and the confidence interval label. +} +\description{ +Extracts and formats the median survival time and its confidence interval +from a fitted Kaplan-Meier object. +} diff --git a/man/h_xticks.Rd b/man/h_xticks.Rd new file mode 100644 index 00000000..c50e8c1f --- /dev/null +++ b/man/h_xticks.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gkm.R +\name{h_xticks} +\alias{h_xticks} +\title{Calculate X-axis Ticks} +\usage{ +h_xticks(data, xticks = NULL, max_time = NULL) +} +\arguments{ +\item{data}{A data frame containing a \code{time} column.} + +\item{xticks}{A numeric vector of specific tick positions, a single number for the interval, or \code{NULL} for auto-calculation.} + +\item{max_time}{Optional numeric value specifying the maximum time to consider for tick range.} +} +\value{ +A numeric vector of x-axis tick positions. +} +\description{ +Determines the positions for x-axis ticks based on the data and user input. +} diff --git a/man/s_coxph_pairwise.Rd b/man/s_coxph_pairwise.Rd new file mode 100644 index 00000000..3adf3125 --- /dev/null +++ b/man/s_coxph_pairwise.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gkm.R +\name{s_coxph_pairwise} +\alias{s_coxph_pairwise} +\title{Pairwise Cox Proportional Hazards Model Calculation} +\usage{ +s_coxph_pairwise( + df, + .ref_group, + .in_ref_col, + .var, + is_event, + strata = NULL, + control = control_coxph(), + ... +) +} +\arguments{ +\item{df}{Data frame for the comparison group.} + +\item{.ref_group}{Data frame for the reference group.} + +\item{.in_ref_col}{Logical, if \code{TRUE} returns empty results (for internal table building).} + +\item{.var}{Character string for the time-to-event variable name.} + +\item{is_event}{Character string for the event status variable name.} + +\item{strata}{Optional character vector of stratification variable names.} + +\item{control}{A list of control parameters from \code{\link{control_coxph}}.} + +\item{...}{Additional arguments (not used).} +} +\value{ +A list containing the p-value, Hazard Ratio (HR), confidence interval (HR\_CI), +and total counts. +} +\description{ +Performs a Cox Proportional Hazards model calculation comparing two groups +(a reference group and a comparison group). This is an internal function used by +\code{\link{h_tbl_coxph_pairwise}}. +} From 291244823c167edb53f117ecca6884a54e461d9b Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Sun, 30 Nov 2025 10:44:57 +0800 Subject: [PATCH 06/51] update ad test --- .Rbuildignore | 1 + DESCRIPTION | 3 ++- NAMESPACE | 2 ++ R/crane-package.R | 3 ++- R/gkm.R | 55 +++++---------------------------------- _pkgdown.yml | 8 ++++++ inst/WORDLIST | 1 + tests/testthat/test-gkm.R | 33 +++++++++++++++++++++++ 8 files changed, 55 insertions(+), 51 deletions(-) create mode 100644 tests/testthat/test-gkm.R diff --git a/.Rbuildignore b/.Rbuildignore index ae7e12a9..4deffee4 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -10,3 +10,4 @@ ^vignettes/articles$ ^\.gitlab-ci\.yml$ LICENSE +.lintr diff --git a/DESCRIPTION b/DESCRIPTION index c976c2f9..919d91f5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -31,7 +31,8 @@ Imports: cli (>= 3.6.4), dplyr (>= 1.1.4), flextable (>= 0.9.7), - ggplot2 (>= 4.0.1), + ggplot2 (>= 4.0.0), + ggsurvfit (>= 1.1.0), glue (>= 1.8.0), gt (>= 0.11.1), lifecycle, diff --git a/NAMESPACE b/NAMESPACE index 024efc1a..4259e82a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -45,6 +45,8 @@ exportMethods("obj_label<-") exportMethods(obj_label) import(glue) import(rlang) +import(tail) +import(utils) importFrom(broom,tidy) importFrom(cowplot,draw_plot) importFrom(cowplot,ggdraw) diff --git a/R/crane-package.R b/R/crane-package.R index 9d4c0225..d295f062 100644 --- a/R/crane-package.R +++ b/R/crane-package.R @@ -1,6 +1,7 @@ #' @keywords internal #' @import rlang #' @import glue glue +#' @importFrom utils tail #' @importFrom dplyr across starts_with ends_with contains matches num_range #' all_of any_of everything last_col where "_PACKAGE" @@ -9,7 +10,7 @@ ## usethis namespace: end NULL -utils::globalVariables(c(".")) +utils::globalVariables(c(".", "obj")) # using pkgs to silence NOTE .silence <- function() { diff --git a/R/gkm.R b/R/gkm.R index aab7d8c1..d0301cc8 100644 --- a/R/gkm.R +++ b/R/gkm.R @@ -1,15 +1,3 @@ -#' @title Control parameters for Cox Proportional Hazards model -#' -#' @description Creates a list of control parameters for Cox Proportional Hazards (Cox-PH) -#' analysis, used by functions like \code{\link{h_tbl_coxph_pairwise}}. -#' -#' @param pval_method A character string specifying the method for calculating the p-value. -#' Must be one of \code{"log-rank"}, \code{"wald"}, or \code{"likelihood"}. -#' @param ties A character string specifying the method for handling tied failure times. -#' Must be one of \code{"efron"}, \code{"breslow"}, or \code{"exact"}. -#' @param conf_level A numeric value between 0 and 1, specifying the confidence level. -#' -#' @return A list with elements \code{pval_method}, \code{ties}, and \code{conf_level}. control_coxph <- function(pval_method = c("log-rank", "wald", "likelihood"), ties = c("efron", "breslow", "exact"), conf_level = 0.95) { pval_method <- match.arg(pval_method) @@ -18,18 +6,6 @@ control_coxph <- function(pval_method = c("log-rank", "wald", "likelihood"), list(pval_method = pval_method, ties = ties, conf_level = conf_level) } -#' @title Control parameters for Median Survival Annotation Box -#' -#' @description Creates a list of control parameters for positioning and styling the -#' median survival annotation box on a plot. -#' -#' @param x A numeric value (0 to 1) for the x-coordinate of the box center (relative to plot area). -#' @param y A numeric value (0 to 1) for the y-coordinate of the box center (relative to plot area). -#' @param w A numeric value (0 to 1) for the width of the box (relative to plot area). -#' @param h A numeric value (0 to 1) for the height of the box (relative to plot area). -#' @param fill A logical value (\code{TRUE} for a default light gray fill) or a color string for the box background. -#' -#' @return A list with elements \code{x}, \code{y}, \code{w}, \code{h}, and \code{fill}. control_surv_med_annot <- function(x = 0.8, y = 0.85, w = 0.32, h = 0.16, fill = TRUE) { assert_proportion_value(x) assert_proportion_value(y) @@ -39,19 +15,6 @@ control_surv_med_annot <- function(x = 0.8, y = 0.85, w = 0.32, h = 0.16, fill = list(x = x, y = y, w = w, h = h, fill = fill) } -#' @title Control parameters for Cox-PH Annotation Box -#' -#' @description Creates a list of control parameters for positioning and styling the -#' Cox Proportional Hazards annotation box on a plot. -#' -#' @param x A numeric value (0 to 1) for the x-coordinate of the box center (relative to plot area). -#' @param y A numeric value (0 to 1) for the y-coordinate of the box center (relative to plot area). -#' @param w A numeric value (0 to 1) for the width of the box (relative to plot area). -#' @param h A numeric value (0 to 1) for the height of the box (relative to plot area). -#' @param fill A logical value (\code{TRUE} for a default light gray fill) or a color string for the box background. -#' @param ref_lbls A logical flag indicating whether to append "vs. ref group" to row names. -#' -#' @return A list with elements \code{x}, \code{y}, \code{w}, \code{h}, \code{fill}, and \code{ref_lbls}. control_coxph_annot <- function(x = 0.29, y = 0.51, w = 0.4, h = 0.125, fill = TRUE, ref_lbls = FALSE) { checkmate::assert_logical(ref_lbls, any.missing = FALSE) @@ -60,12 +23,6 @@ control_coxph_annot <- function(x = 0.29, y = 0.51, w = 0.4, h = 0.125, fill = T } -## Helper Functions (Formatting, Data Preparation, Plotting Utilities) - -#' @title Format Confidence Level String -#' @description Converts a confidence level (e.g., 0.95) to a formatted string (e.g., "95% CI"). -#' @param conf_level A numeric confidence level (proportion, 0 to 1). -#' @return A character string. f_conf_level <- function(conf_level) { assert_proportion_value(conf_level) # Assuming assert_proportion_value is defined elsewhere paste0(conf_level * 100, "% CI") @@ -82,7 +39,7 @@ f_conf_level <- function(conf_level) { #' @param col_lab_fontface Character string for the font face of column labels (e.g., "bold"). #' @param hline Logical, whether to draw a horizontal line below the column labels. #' @param bg_fill Optional color string for the plot background. -#' +#' @keywords internal #' @return A \code{ggplot2} object representing the table. #' @importFrom ggplot2 ggplot theme_void scale_x_continuous scale_y_continuous theme element_rect annotate element_text .pt df2gg <- function(df, colwidths = NULL, font_size = 10, col_labels = TRUE, @@ -146,7 +103,7 @@ df2gg <- function(df, colwidths = NULL, font_size = 10, col_labels = TRUE, #' @param data A data frame containing a \code{time} column. #' @param xticks A numeric vector of specific tick positions, a single number for the interval, or \code{NULL} for auto-calculation. #' @param max_time Optional numeric value specifying the maximum time to consider for tick range. -#' +#' @keywords internal #' @return A numeric vector of x-axis tick positions. h_xticks <- function(data, xticks = NULL, max_time = NULL) { # ... (function body remains the same) @@ -182,7 +139,7 @@ h_xticks <- function(data, xticks = NULL, max_time = NULL) { #' #' @param fit_km A fitted Kaplan-Meier object of class \code{survfit}. #' @param armval Character string to use as the row name if \code{fit_km} has no strata (e.g., "All"). -#' +#' @keywords internal #' @return A data frame with columns "N", "Median", and the confidence interval label. h_tbl_median_surv <- function(fit_km, armval = "All") { # ... (function body remains the same) @@ -342,7 +299,7 @@ h_data_plot <- function(fit_km, #' @param strata Optional character vector of stratification variable names. #' @param control A list of control parameters from \code{\link{control_coxph}}. #' @param ... Additional arguments (not used). -#' +#' @keywords internal #' @return A list containing the p-value, Hazard Ratio (HR), confidence interval (HR\_CI), #' and total counts. #' @importFrom survival Surv coxph survdiff @@ -792,7 +749,7 @@ annot_at_risk <- function(gg_plt, fit_km, font_size = 10, annot_at_risk_title = #' @param conf_level A numeric value (0 to 1) for the confidence level. #' @param conf_type A character string specifying the type of confidence interval. #' Must be one of \code{"plain"}, \code{"log"}, or \code{"log-log"}. - #' + #' @keywords internal #' @return A list with elements \code{conf_level} and \code{conf_type}. control_surv_timepoint <- function(conf_level = 0.95, conf_type = c("plain", "log", "log-log")) { conf_type <- match.arg(conf_type) @@ -814,7 +771,7 @@ annot_at_risk <- function(gg_plt, fit_km, font_size = 10, annot_at_risk_title = #' @param control_surv A list of control parameters for the \code{survival::survfit} function, #' typically generated by \code{\link{control_surv_timepoint}}, controlling confidence level #' and confidence interval type. -#' +#' @keywords internal #' @return An object of class \code{survfit} from the \code{survival} package, containing #' the fitted Kaplan-Meier curves. #' @importFrom survival survfit Surv diff --git a/_pkgdown.yml b/_pkgdown.yml index c850f898..0494b77d 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -41,3 +41,11 @@ reference: - modify_zero_recode - add_blank_rows - label_roche + - title: "g km plot" + contents: + - h_tbl_coxph_pairwise + - h_data_plot + - g_km + - annot_surv_med + - annot_cox_ph + - annot_at_risk diff --git a/inst/WORDLIST b/inst/WORDLIST index 7f7292b8..0db4adb4 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -25,3 +25,4 @@ tbl tte tidyselect unstratified +customizations diff --git a/tests/testthat/test-gkm.R b/tests/testthat/test-gkm.R new file mode 100644 index 00000000..f247c6d9 --- /dev/null +++ b/tests/testthat/test-gkm.R @@ -0,0 +1,33 @@ +skip_on_cran() + +anl <- cards::ADTTE |> + dplyr::mutate(is_event = CNSR == 0) %>% + dplyr::mutate(TRTP = as.factor(TRTP)) + +variables <- list(tte = "AVAL", is_event = "is_event", arm = "TRTP") + +test_that("test gkm() works", { + fit_kmg01 <- survfit(ggsurvfit::Surv_CNSR(AVAL, CNSR) ~ TRTP, anl) + variables <- list(tte = "AVAL", is_event = "is_event", arm = "TRTP") + + expect_no_error(surv_plot_data <- h_data_plot(fit_kmg01)) + + expect_no_error( + suppressWarnings( + coxph_tbl <- h_tbl_coxph_pairwise( + df = anl, + variables = variables + ) + ) + ) + + expect_no_error( + plt_kmg01 <- g_km(surv_plot_data, + xlab = "Time (Days)", + ylim = c(0.9, 1) + ) %>% + annot_surv_med(fit_kmg01) %>% + annot_cox_ph(coxph_tbl) %>% + annot_at_risk(fit_kmg01) + ) +}) From 015b86e91d3d1d34d29ea3fbd0951a825080e3a9 Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Sun, 30 Nov 2025 10:48:12 +0800 Subject: [PATCH 07/51] update namespace --- NAMESPACE | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 4259e82a..db60c2bc 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -45,8 +45,6 @@ exportMethods("obj_label<-") exportMethods(obj_label) import(glue) import(rlang) -import(tail) -import(utils) importFrom(broom,tidy) importFrom(cowplot,draw_plot) importFrom(cowplot,ggdraw) @@ -98,3 +96,4 @@ importFrom(survival,coxph) importFrom(survival,survdiff) importFrom(survival,survfit) importFrom(tidyr,pivot_wider) +importFrom(utils,tail) From ec08202bea3406a0c2db39130d8587f7654e35fb Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Sun, 30 Nov 2025 10:48:23 +0800 Subject: [PATCH 08/51] update doc --- man/control_coxph.Rd | 28 ---------------------------- man/control_coxph_annot.Rd | 35 ----------------------------------- man/control_surv_med_annot.Rd | 26 -------------------------- man/control_surv_timepoint.Rd | 1 + man/df2gg.Rd | 1 + man/f_conf_level.Rd | 17 ----------------- man/h_km_fit.Rd | 1 + man/h_tbl_median_surv.Rd | 1 + man/h_xticks.Rd | 1 + man/s_coxph_pairwise.Rd | 1 + 10 files changed, 6 insertions(+), 106 deletions(-) delete mode 100644 man/control_coxph.Rd delete mode 100644 man/control_coxph_annot.Rd delete mode 100644 man/control_surv_med_annot.Rd delete mode 100644 man/f_conf_level.Rd diff --git a/man/control_coxph.Rd b/man/control_coxph.Rd deleted file mode 100644 index 9b79793b..00000000 --- a/man/control_coxph.Rd +++ /dev/null @@ -1,28 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gkm.R -\name{control_coxph} -\alias{control_coxph} -\title{Control parameters for Cox Proportional Hazards model} -\usage{ -control_coxph( - pval_method = c("log-rank", "wald", "likelihood"), - ties = c("efron", "breslow", "exact"), - conf_level = 0.95 -) -} -\arguments{ -\item{pval_method}{A character string specifying the method for calculating the p-value. -Must be one of \code{"log-rank"}, \code{"wald"}, or \code{"likelihood"}.} - -\item{ties}{A character string specifying the method for handling tied failure times. -Must be one of \code{"efron"}, \code{"breslow"}, or \code{"exact"}.} - -\item{conf_level}{A numeric value between 0 and 1, specifying the confidence level.} -} -\value{ -A list with elements \code{pval_method}, \code{ties}, and \code{conf_level}. -} -\description{ -Creates a list of control parameters for Cox Proportional Hazards (Cox-PH) -analysis, used by functions like \code{\link{h_tbl_coxph_pairwise}}. -} diff --git a/man/control_coxph_annot.Rd b/man/control_coxph_annot.Rd deleted file mode 100644 index b5e541c2..00000000 --- a/man/control_coxph_annot.Rd +++ /dev/null @@ -1,35 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gkm.R -\name{control_coxph_annot} -\alias{control_coxph_annot} -\title{Control parameters for Cox-PH Annotation Box} -\usage{ -control_coxph_annot( - x = 0.29, - y = 0.51, - w = 0.4, - h = 0.125, - fill = TRUE, - ref_lbls = FALSE -) -} -\arguments{ -\item{x}{A numeric value (0 to 1) for the x-coordinate of the box center (relative to plot area).} - -\item{y}{A numeric value (0 to 1) for the y-coordinate of the box center (relative to plot area).} - -\item{w}{A numeric value (0 to 1) for the width of the box (relative to plot area).} - -\item{h}{A numeric value (0 to 1) for the height of the box (relative to plot area).} - -\item{fill}{A logical value (\code{TRUE} for a default light gray fill) or a color string for the box background.} - -\item{ref_lbls}{A logical flag indicating whether to append "vs. ref group" to row names.} -} -\value{ -A list with elements \code{x}, \code{y}, \code{w}, \code{h}, \code{fill}, and \code{ref_lbls}. -} -\description{ -Creates a list of control parameters for positioning and styling the -Cox Proportional Hazards annotation box on a plot. -} diff --git a/man/control_surv_med_annot.Rd b/man/control_surv_med_annot.Rd deleted file mode 100644 index 15c6e3ee..00000000 --- a/man/control_surv_med_annot.Rd +++ /dev/null @@ -1,26 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gkm.R -\name{control_surv_med_annot} -\alias{control_surv_med_annot} -\title{Control parameters for Median Survival Annotation Box} -\usage{ -control_surv_med_annot(x = 0.8, y = 0.85, w = 0.32, h = 0.16, fill = TRUE) -} -\arguments{ -\item{x}{A numeric value (0 to 1) for the x-coordinate of the box center (relative to plot area).} - -\item{y}{A numeric value (0 to 1) for the y-coordinate of the box center (relative to plot area).} - -\item{w}{A numeric value (0 to 1) for the width of the box (relative to plot area).} - -\item{h}{A numeric value (0 to 1) for the height of the box (relative to plot area).} - -\item{fill}{A logical value (\code{TRUE} for a default light gray fill) or a color string for the box background.} -} -\value{ -A list with elements \code{x}, \code{y}, \code{w}, \code{h}, and \code{fill}. -} -\description{ -Creates a list of control parameters for positioning and styling the -median survival annotation box on a plot. -} diff --git a/man/control_surv_timepoint.Rd b/man/control_surv_timepoint.Rd index c2c5979b..457addf8 100644 --- a/man/control_surv_timepoint.Rd +++ b/man/control_surv_timepoint.Rd @@ -22,3 +22,4 @@ A list with elements \code{conf_level} and \code{conf_type}. Creates a list of control parameters for \code{survival::survfit} when used for timepoint estimation. } +\keyword{internal} diff --git a/man/df2gg.Rd b/man/df2gg.Rd index 3b3eb820..cc7a2be6 100644 --- a/man/df2gg.Rd +++ b/man/df2gg.Rd @@ -35,3 +35,4 @@ A \code{ggplot2} object representing the table. \description{ Creates a \code{ggplot2} object that renders a data frame as a table graphic. } +\keyword{internal} diff --git a/man/f_conf_level.Rd b/man/f_conf_level.Rd deleted file mode 100644 index a6653cad..00000000 --- a/man/f_conf_level.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gkm.R -\name{f_conf_level} -\alias{f_conf_level} -\title{Format Confidence Level String} -\usage{ -f_conf_level(conf_level) -} -\arguments{ -\item{conf_level}{A numeric confidence level (proportion, 0 to 1).} -} -\value{ -A character string. -} -\description{ -Converts a confidence level (e.g., 0.95) to a formatted string (e.g., "95\% CI"). -} diff --git a/man/h_km_fit.Rd b/man/h_km_fit.Rd index 267a4527..6f99e03b 100644 --- a/man/h_km_fit.Rd +++ b/man/h_km_fit.Rd @@ -26,3 +26,4 @@ This helper function fits a Kaplan-Meier survival curve model using the formula \code{survival::Surv(tte, is_event) ~ arm}. It is designed to be a prerequisite for plotting functions like \code{\link{g_km}}. } +\keyword{internal} diff --git a/man/h_tbl_median_surv.Rd b/man/h_tbl_median_surv.Rd index a99e272d..8d181629 100644 --- a/man/h_tbl_median_surv.Rd +++ b/man/h_tbl_median_surv.Rd @@ -18,3 +18,4 @@ A data frame with columns "N", "Median", and the confidence interval label. Extracts and formats the median survival time and its confidence interval from a fitted Kaplan-Meier object. } +\keyword{internal} diff --git a/man/h_xticks.Rd b/man/h_xticks.Rd index c50e8c1f..2f912508 100644 --- a/man/h_xticks.Rd +++ b/man/h_xticks.Rd @@ -19,3 +19,4 @@ A numeric vector of x-axis tick positions. \description{ Determines the positions for x-axis ticks based on the data and user input. } +\keyword{internal} diff --git a/man/s_coxph_pairwise.Rd b/man/s_coxph_pairwise.Rd index 3adf3125..39a44de0 100644 --- a/man/s_coxph_pairwise.Rd +++ b/man/s_coxph_pairwise.Rd @@ -41,3 +41,4 @@ Performs a Cox Proportional Hazards model calculation comparing two groups (a reference group and a comparison group). This is an internal function used by \code{\link{h_tbl_coxph_pairwise}}. } +\keyword{internal} From 585ee242635351d648067d87a4ba3010e0627e3d Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Mon, 1 Dec 2025 09:43:09 +0800 Subject: [PATCH 09/51] rm code --- R/gkm.R | 62 --------------------------------------------------------- 1 file changed, 62 deletions(-) diff --git a/R/gkm.R b/R/gkm.R index d0301cc8..23e623ca 100644 --- a/R/gkm.R +++ b/R/gkm.R @@ -734,65 +734,3 @@ annot_at_risk <- function(gg_plt, fit_km, font_size = 10, annot_at_risk_title = gg_plt } -# styler: off -# nocov start - - - - ## Control and Internal KM Fit (from `tern` or similar) - - #' @title Control parameters for Survival Timepoint Estimation - #' - #' @description Creates a list of control parameters for \code{survival::survfit} when used - #' for timepoint estimation. - #' - #' @param conf_level A numeric value (0 to 1) for the confidence level. - #' @param conf_type A character string specifying the type of confidence interval. - #' Must be one of \code{"plain"}, \code{"log"}, or \code{"log-log"}. - #' @keywords internal - #' @return A list with elements \code{conf_level} and \code{conf_type}. - control_surv_timepoint <- function(conf_level = 0.95, conf_type = c("plain", "log", "log-log")) { - conf_type <- match.arg(conf_type) - assert_proportion_value(conf_level) # Assuming assert_proportion_value is defined elsewhere - list(conf_level = conf_level, conf_type = conf_type) - } - - -#' @title Kaplan-Meier Survival Curve Fitting -#' -#' @description This helper function fits a Kaplan-Meier survival curve model -#' using the formula \code{survival::Surv(tte, is_event) ~ arm}. -#' It is designed to be a prerequisite for plotting functions like \code{\link{g_km}}. -#' -#' @param df A data frame containing time-to-event (tte), event status (\code{is_event}), -#' and treatment arm (\code{arm}) variables. -#' @param variables A named list specifying the column names for time-to-event (\code{tte}), -#' event status (\code{is_event}), and treatment arm (\code{arm}) in \code{df}. -#' @param control_surv A list of control parameters for the \code{survival::survfit} function, -#' typically generated by \code{\link{control_surv_timepoint}}, controlling confidence level -#' and confidence interval type. -#' @keywords internal -#' @return An object of class \code{survfit} from the \code{survival} package, containing -#' the fitted Kaplan-Meier curves. -#' @importFrom survival survfit Surv -#' @importFrom stats as.formula -h_km_fit <- function(df, variables, control_surv = control_surv_timepoint()) { - tte <- variables$tte - is_event <- variables$is_event - arm <- variables$arm - - # Assuming assert_valid_factor and assert_df_with_variables are defined elsewhere - assert_valid_factor(df[[arm]]) - assert_df_with_variables(df, list(tte = tte, is_event = is_event, arm = arm)) - - formula <- stats::as.formula(paste0("survival::Surv(", tte, ", ", is_event, ") ~ ", arm)) - fit_km <- survival::survfit( - formula = formula, - data = df, - conf.int = control_surv$conf_level, - conf.type = control_surv$conf_type - ) - return(fit_km) -} -# nocov end -# styler: on From 4c3268f08fe41f7f019314ca198eb8fc56d9af2b Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Mon, 1 Dec 2025 09:49:04 +0800 Subject: [PATCH 10/51] rm doc --- NAMESPACE | 1 - man/control_surv_timepoint.Rd | 25 ------------------------- man/h_km_fit.Rd | 29 ----------------------------- 3 files changed, 55 deletions(-) delete mode 100644 man/control_surv_timepoint.Rd delete mode 100644 man/h_km_fit.Rd diff --git a/NAMESPACE b/NAMESPACE index db60c2bc..982c9a68 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -94,6 +94,5 @@ importFrom(stats,as.formula) importFrom(survival,Surv) importFrom(survival,coxph) importFrom(survival,survdiff) -importFrom(survival,survfit) importFrom(tidyr,pivot_wider) importFrom(utils,tail) diff --git a/man/control_surv_timepoint.Rd b/man/control_surv_timepoint.Rd deleted file mode 100644 index 457addf8..00000000 --- a/man/control_surv_timepoint.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gkm.R -\name{control_surv_timepoint} -\alias{control_surv_timepoint} -\title{Control parameters for Survival Timepoint Estimation} -\usage{ -control_surv_timepoint( - conf_level = 0.95, - conf_type = c("plain", "log", "log-log") -) -} -\arguments{ -\item{conf_level}{A numeric value (0 to 1) for the confidence level.} - -\item{conf_type}{A character string specifying the type of confidence interval. -Must be one of \code{"plain"}, \code{"log"}, or \code{"log-log"}.} -} -\value{ -A list with elements \code{conf_level} and \code{conf_type}. -} -\description{ -Creates a list of control parameters for \code{survival::survfit} when used -for timepoint estimation. -} -\keyword{internal} diff --git a/man/h_km_fit.Rd b/man/h_km_fit.Rd deleted file mode 100644 index 6f99e03b..00000000 --- a/man/h_km_fit.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gkm.R -\name{h_km_fit} -\alias{h_km_fit} -\title{Kaplan-Meier Survival Curve Fitting} -\usage{ -h_km_fit(df, variables, control_surv = control_surv_timepoint()) -} -\arguments{ -\item{df}{A data frame containing time-to-event (tte), event status (\code{is_event}), -and treatment arm (\code{arm}) variables.} - -\item{variables}{A named list specifying the column names for time-to-event (\code{tte}), -event status (\code{is_event}), and treatment arm (\code{arm}) in \code{df}.} - -\item{control_surv}{A list of control parameters for the \code{survival::survfit} function, -typically generated by \code{\link{control_surv_timepoint}}, controlling confidence level -and confidence interval type.} -} -\value{ -An object of class \code{survfit} from the \code{survival} package, containing -the fitted Kaplan-Meier curves. -} -\description{ -This helper function fits a Kaplan-Meier survival curve model -using the formula \code{survival::Surv(tte, is_event) ~ arm}. -It is designed to be a prerequisite for plotting functions like \code{\link{g_km}}. -} -\keyword{internal} From b29d65d0c04ebfcbc73c553bc3fec914dc8e1ca0 Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Mon, 1 Dec 2025 14:04:05 +0800 Subject: [PATCH 11/51] modify code --- NAMESPACE | 7 +- R/assert.R | 110 ---------------- R/from_formatters.R | 31 ----- R/gkm.R | 250 ++++++++++-------------------------- man/get_cox_pairwise_tbl.Rd | 50 ++++++++ man/h_tbl_coxph_pairwise.Rd | 37 ------ man/obj_label-set.Rd | 14 -- man/s_coxph_pairwise.Rd | 44 ------- 8 files changed, 118 insertions(+), 425 deletions(-) delete mode 100644 R/assert.R delete mode 100644 R/from_formatters.R create mode 100644 man/get_cox_pairwise_tbl.Rd delete mode 100644 man/h_tbl_coxph_pairwise.Rd delete mode 100644 man/obj_label-set.Rd delete mode 100644 man/s_coxph_pairwise.Rd diff --git a/NAMESPACE b/NAMESPACE index 982c9a68..84d07f40 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,7 +7,6 @@ S3method(add_overall,tbl_shift) S3method(add_overall,tbl_survfit_quantiles) S3method(add_overall,tbl_survfit_times) export("%>%") -export("obj_label<-") export(add_blank_rows) export(add_hierarchical_count_row) export(add_overall) @@ -16,8 +15,8 @@ export(annot_cox_ph) export(annot_surv_med) export(filter_hierarchical) export(g_km) +export(get_cox_pairwise_tbl) export(h_data_plot) -export(h_tbl_coxph_pairwise) export(label_roche_number) export(label_roche_percent) export(label_roche_pvalue) @@ -41,8 +40,6 @@ export(tbl_shift) export(tbl_survfit_quantiles) export(tbl_survfit_times) export(theme_gtsummary_roche) -exportMethods("obj_label<-") -exportMethods(obj_label) import(glue) import(rlang) importFrom(broom,tidy) @@ -90,7 +87,7 @@ importFrom(gtsummary,add_overall) importFrom(gtsummary,filter_hierarchical) importFrom(gtsummary,sort_hierarchical) importFrom(rlang,.data) -importFrom(stats,as.formula) +importFrom(stats,pchisq) importFrom(survival,Surv) importFrom(survival,coxph) importFrom(survival,survdiff) diff --git a/R/assert.R b/R/assert.R deleted file mode 100644 index 8165d3ba..00000000 --- a/R/assert.R +++ /dev/null @@ -1,110 +0,0 @@ -# styler: off -# nocov start - - -assert_proportion_value <- function(x, include_boundaries = FALSE) { - checkmate::assert_number(x, lower = 0, upper = 1) - checkmate::assert_flag(include_boundaries) - if (isFALSE(include_boundaries)) { - checkmate::assert_true(x > 0) - checkmate::assert_true(x < 1) - } -} - -check_list_of_variables <- function(x) { - x <- Filter(Negate(is.null), x) - res <- checkmate::check_list(x, - names = "named", min.len = 1, - any.missing = FALSE, types = "character" - ) - if (isTRUE(res)) { - res <- checkmate::check_character(unlist(x), min.chars = 1) - } - res -} - -assert_list_of_variables <- function(x, .var.name = checkmate::vname(x), add = NULL) { - if (missing(x)) { - stop(sprintf( - "argument \"%s\" is missing, with no default", - .var.name - )) - } - res <- check_list_of_variables(x) - checkmate::makeAssertion(x, res, .var.name, add) -} - -check_df_with_variables <- function(df, variables, na_level = NULL) { - checkmate::assert_data_frame(df) - assert_list_of_variables(variables) - err_flag <- all(unlist(variables) %in% colnames(df)) - checkmate::assert_flag(err_flag) - if (isFALSE(err_flag)) { - vars <- setdiff(unlist(variables), colnames(df)) - return(paste( - deparse(substitute(df)), "does not contain all specified variables as column names. Missing from data frame:", - paste(vars, collapse = ", ") - )) - } - if (!is.null(na_level)) { - checkmate::assert_string(na_level) - res <- unlist(lapply( - as.list(df)[unlist(variables)], - function(x) any(x == na_level) - )) - if (any(res)) { - return(paste0( - deparse(substitute(df)), " contains explicit na_level (", - na_level, ") in the following columns: ", paste0(unlist(variables)[res], - collapse = ", " - ) - )) - } - } - return(TRUE) -} - -assert_df_with_variables <- function(df, variables, na_level = NULL, .var.name = checkmate::vname(df), - add = NULL) { - if (missing(df)) { - stop(sprintf( - "argument \"%s\" is missing, with no default", - .var.name - )) - } - res <- check_df_with_variables(df, variables, na_level) - checkmate::makeAssertion(df, res, .var.name, add) -} - -check_valid_factor <- function(x, min.levels = 1, max.levels = NULL, null.ok = TRUE, - any.missing = TRUE, n.levels = NULL, len = NULL) { - checkmate::assert_int(min.levels, lower = 1) - res <- checkmate::check_factor(x, - min.levels = min.levels, - null.ok = null.ok, max.levels = max.levels, any.missing = any.missing, - n.levels = n.levels - ) - if (isTRUE(res)) { - res <- checkmate::check_character(levels(x), min.chars = 1) - } - return(res) -} - -assert_valid_factor <- function(x, min.levels = 1, max.levels = NULL, null.ok = TRUE, - any.missing = TRUE, n.levels = NULL, len = NULL, .var.name = checkmate::vname(x), - add = NULL) { - if (missing(x)) { - stop(sprintf( - "argument \"%s\" is missing, with no default", - .var.name - )) - } - res <- check_valid_factor( - x, min.levels, max.levels, null.ok, - any.missing, n.levels, len - ) - checkmate::makeAssertion(x, res, .var.name, add) -} - -# nocov end -# styler: on diff --git a/R/from_formatters.R b/R/from_formatters.R deleted file mode 100644 index 0bab0ab7..00000000 --- a/R/from_formatters.R +++ /dev/null @@ -1,31 +0,0 @@ -# ## Changelog -# styler: off -# nocov start - -setGeneric("obj_label", function(obj) standardGeneric("obj_label")) - -#' The new label -#' @param value character(1). The new label -#' @export -setGeneric("obj_label<-", function(obj, value) standardGeneric("obj_label<-")) - -#' @exportMethod obj_label -setMethod("obj_label", "ANY", function(obj) attr(obj, "label")) - -#' @exportMethod obj_label<- -setMethod( - "obj_label<-", "ANY", - function(obj, value) { - attr(obj, "label") <- value - obj - } -) - -with_label <- function (x, label) -{ - obj_label(x) <- label - x -} - -# nocov end -# styler: on diff --git a/R/gkm.R b/R/gkm.R index 23e623ca..57dfc77e 100644 --- a/R/gkm.R +++ b/R/gkm.R @@ -1,16 +1,9 @@ -control_coxph <- function(pval_method = c("log-rank", "wald", "likelihood"), - ties = c("efron", "breslow", "exact"), conf_level = 0.95) { - pval_method <- match.arg(pval_method) - ties <- match.arg(ties) - assert_proportion_value(conf_level) # Assuming assert_proportion_value is defined elsewhere - list(pval_method = pval_method, ties = ties, conf_level = conf_level) -} control_surv_med_annot <- function(x = 0.8, y = 0.85, w = 0.32, h = 0.16, fill = TRUE) { - assert_proportion_value(x) - assert_proportion_value(y) - assert_proportion_value(w) - assert_proportion_value(h) + # assert_proportion_value(x) + # assert_proportion_value(y) + # assert_proportion_value(w) + # assert_proportion_value(h) list(x = x, y = y, w = w, h = h, fill = fill) } @@ -23,10 +16,6 @@ control_coxph_annot <- function(x = 0.29, y = 0.51, w = 0.4, h = 0.125, fill = T } -f_conf_level <- function(conf_level) { - assert_proportion_value(conf_level) # Assuming assert_proportion_value is defined elsewhere - paste0(conf_level * 100, "% CI") -} #' @title Convert Data Frame to ggplot2 Table Graphic #' @@ -163,69 +152,75 @@ h_tbl_median_surv <- function(fit_km, armval = "All") { ) } -#' @title Pairwise Cox Proportional Hazards Model Summary Table +#' Perform Pairwise Cox Proportional Hazards Regression +#' +#' This function performs a pairwise comparison of treatment arms using the **Cox proportional hazards model** and calculates the corresponding **log-rank p-value**. Each comparison is made between a specified reference group and all other comparison groups in the dataset. +#' +#' @param model_formula A \code{\link[stats]{formula}} object specifying the survival model, typically in the form \code{Surv(time, status) ~ arm + covariates}. +#' @param data A \code{\link[base]{data.frame}} containing the survival data, including time, status, and the arm variable. +#' @param arm A character string specifying the name of the column in \code{data} that contains the grouping/treatment arm variable (must be a factor-like variable). +#' @param ref_group A character string specifying the level of the \code{arm} variable to be used as the **reference group** for all pairwise comparisons. If \code{NULL} (the default), the **first unique level** of the \code{arm} column is used as the reference group. #' -#' @description This function computes and formats the results of a pairwise Cox Proportional -#' Hazards (Cox-PH) regression analysis between different treatment arms. +#' @return A \code{\link[base]{data.frame}} with the results of the pairwise comparisons. The columns include: +#' \itemize{ +#' \item \code{arm}: The comparison arm being tested against the reference group. +#' \item \code{hr}: The Hazard Ratio (HR) for the comparison arm vs. the reference arm, formatted to two decimal places. +#' \item \code{ci}: The 95\% confidence interval for the HR, presented as a string in the format "(lower, upper)", with values formatted to two decimal places. +#' \item \code{pval}: The log-rank p-value for the comparison. +#' } #' -#' @param df A data frame containing the survival data. -#' @param variables A named list specifying the column names for time-to-event (\code{tte}), -#' treatment arm (\code{arm}), event status (\code{is_event}), and optional strata (\code{strata}) in \code{df}. -#' @param ref_group_coxph An optional string specifying the reference group for the Cox-PH model. -#' If \code{NULL}, the first factor level of the arm variable is used as the reference group. -#' @param control_coxph_pw A list of control parameters for the Cox-PH model, typically -#' generated by \code{\link{control_coxph}}, controlling the p-value method, ties handling, and confidence level. -#' @param annot_coxph_ref_lbls A logical flag indicating whether to append "vs. ref group" -#' to the row names in the resulting table. +#' @details The function iterates through each unique arm (excluding the reference group), filters the data to include only the current comparison arm and the reference arm, and then fits a Cox model (\code{\link[survival]{coxph}}) and performs a log-rank test (\code{\link[survival]{survdiff}}). The Hazard Ratio and its 95\% confidence interval are extracted from the Cox model summary, and the p-value is calculated from the log-rank test. #' -#' @return A data frame summarizing the pairwise Cox-PH results, including Hazard Ratio (HR), -#' its confidence interval, and the p-value. +#' @importFrom survival coxph Surv survdiff +#' @importFrom stats pchisq #' @export -h_tbl_coxph_pairwise <- function(df, - variables, - ref_group_coxph = NULL, - control_coxph_pw = control_coxph(), - annot_coxph_ref_lbls = FALSE) { - # ... (function body remains the same) - assert_df_with_variables(df, variables) # Assuming assert_df_with_variables is defined elsewhere - checkmate::assert_choice(ref_group_coxph, levels(df[[variables$arm]]), null.ok = TRUE) - checkmate::assert_flag(annot_coxph_ref_lbls) - - arm <- variables$arm - df[[arm]] <- factor(df[[arm]]) - - ref_group <- if (!is.null(ref_group_coxph)) ref_group_coxph else levels(df[[variables$arm]])[1] - comp_group <- setdiff(levels(df[[arm]]), ref_group) - - results <- Map(function(comp) { - res <- s_coxph_pairwise( - df = df[df[[arm]] == comp, , drop = FALSE], - .ref_group = df[df[[arm]] == ref_group, , drop = FALSE], - .in_ref_col = FALSE, - .var = variables$tte, - is_event = variables$is_event, - strata = variables$strata, - control = control_coxph_pw - ) - res_df <- data.frame( - hr = format(round(res$hr, 2), nsmall = 2), - hr_ci = paste0( - "(", format(round(res$hr_ci[1], 2), nsmall = 2), ", ", - format(round(res$hr_ci[2], 2), nsmall = 2), ")" - ), - pvalue = if (res$pvalue < 0.0001) "<0.0001" else format(round(res$pvalue, 4), 4), - stringsAsFactors = FALSE +#' +#' @examples +#' \dontrun{ +#' # Example data setup (assuming 'time' is event time, 'status' is event indicator (1=event), +#' # and 'arm' is the treatment group) +#' # library(survival) +#' # data(lung) +#' # lung$arm <- factor(sample(c("A", "B", "C"), nrow(lung), replace = TRUE)) +#' # lung$status <- lung$status - 1 # Convert status to 0/1 +#' # lung <- na.omit(lung) +#' +#' ormula <- Surv(time, status) ~ arm +#' results_tbl <- get_cox_pairwise_tbl(model_formula = formula, +#' data = lung, +#' arm = "arm", +#' ref_group = "A") +#' print(results_tbl) +#' } +get_cox_pairwise_tbl <- function(model_formula, data, arm, ref_group = NULL){ + ref_group <- if (!is.null(ref_group)) ref_group else unique(data[[arm]])[1] + comp_group <- setdiff(unique(data[[arm]]), ref_group) + + ret <- c() + for (current_arm in comp_group){ + comp_df <- data[data[[arm]] %in% c(ref_group, current_arm), ] + suppressWarnings( + coxph_ans <- coxph(formula = model_formula, data = comp_df) %>% summary()) + orginal_survdiff <- survdiff(formula = model_formula, data = comp_df) + log_rank_pvalue <- 1 - stats::pchisq(orginal_survdiff$chisq, length(orginal_survdiff$n) - + 1) + current_row <- data.frame( + arm = current_arm, + hr = sprintf("%.2f", coxph_ans$conf.int[1,1]), + ci = paste0("(", + sprintf("%.2f", coxph_ans$conf.int[1,3]), + ", ", + sprintf("%.2f", coxph_ans$conf.int[1,4]), + ")"), + pval = log_rank_pvalue ) - # Assuming obj_label is defined elsewhere and hr_ci is the label for the CI - colnames(res_df) <- c("HR", vapply(res[c("hr_ci", "pvalue")], obj_label, FUN.VALUE = "character")) # nolint - row.names(res_df) <- comp - res_df - }, comp_group) - if (annot_coxph_ref_lbls) names(results) <- paste(comp_group, "vs.", ref_group) - - do.call(rbind, results) + ret <- rbind(ret, current_row) + } + + return (ret) } + #' @title Prepare Kaplan-Meier Data for Plotting #' #' @description Takes a fitted \code{survfit} object and processes it into a data frame @@ -283,118 +278,6 @@ h_data_plot <- function(fit_km, } -## Core Statistical Function - -#' @title Pairwise Cox Proportional Hazards Model Calculation -#' -#' @description Performs a Cox Proportional Hazards model calculation comparing two groups -#' (a reference group and a comparison group). This is an internal function used by -#' \code{\link{h_tbl_coxph_pairwise}}. -#' -#' @param df Data frame for the comparison group. -#' @param .ref_group Data frame for the reference group. -#' @param .in_ref_col Logical, if \code{TRUE} returns empty results (for internal table building). -#' @param .var Character string for the time-to-event variable name. -#' @param is_event Character string for the event status variable name. -#' @param strata Optional character vector of stratification variable names. -#' @param control A list of control parameters from \code{\link{control_coxph}}. -#' @param ... Additional arguments (not used). -#' @keywords internal -#' @return A list containing the p-value, Hazard Ratio (HR), confidence interval (HR\_CI), -#' and total counts. -#' @importFrom survival Surv coxph survdiff -#' @importFrom stats as.formula -s_coxph_pairwise <- - function(df, .ref_group, .in_ref_col, .var, is_event, strata = NULL, - control = control_coxph(), ...) { - # ... (function body remains the same) - checkmate::assert_string(.var) - checkmate::assert_numeric(df[[.var]]) - checkmate::assert_logical(df[[is_event]]) - assert_df_with_variables(df, list(tte = .var, is_event = is_event)) - pval_method <- control$pval_method - ties <- control$ties - conf_level <- control$conf_level - if (.in_ref_col) { - # ... (returns empty list for reference column) - return(list(pvalue = with_label( - numeric(), - paste0("p-value (", pval_method, ")") - ), hr = with_label( - numeric(), - "Hazard Ratio" - ), hr_ci = with_label( - numeric(), - f_conf_level(conf_level) - ), hr_ci_3d = with_label( - numeric(), - paste0( - "Hazard Ratio (", f_conf_level(conf_level), - ")" - ) - ), n_tot = with_label( - numeric(), - "Total n" - ), n_tot_events = with_label( - numeric(), - "Total events" - ))) - } - data <- rbind(.ref_group, df) - group <- factor(rep(c("ref", "x"), c(nrow(.ref_group), nrow(df))), - levels = c("ref", "x") - ) - df_cox <- data.frame( - tte = data[[.var]], is_event = data[[is_event]], - arm = group - ) - if (is.null(strata)) { - formula_cox <- survival::Surv(tte, is_event) ~ arm - } else { - formula_cox <- stats::as.formula(paste0( - "survival::Surv(tte, is_event) ~ arm + survival::strata(", - paste(strata, collapse = ","), ")" - )) - df_cox <- cbind(df_cox, data[strata]) - } - cox_fit <- survival::coxph( - formula = formula_cox, data = df_cox, - ties = ties - ) - sum_cox <- summary(cox_fit, conf.int = conf_level, extend = TRUE) - orginal_survdiff <- survival::survdiff(formula_cox, data = df_cox) - log_rank_pvalue <- 1 - stats::pchisq(orginal_survdiff$chisq, length(orginal_survdiff$n) - - 1) - pval <- switch(pval_method, - wald = sum_cox$waldtest["pvalue"], - `log-rank` = log_rank_pvalue, - likelihood = sum_cox$logtest["pvalue"] - ) - # Assuming with_label is defined elsewhere - list(pvalue = with_label(unname(pval), paste0( - "p-value (", - pval_method, ")" - )), hr = with_label(sum_cox$conf.int[ - 1, - 1 - ], "Hazard Ratio"), hr_ci = with_label(unname(sum_cox$conf.int[ - 1, - 3:4 - ]), f_conf_level(conf_level)), hr_ci_3d = with_label(c(sum_cox$conf.int[ - 1, - 1 - ], unname(sum_cox$conf.int[1, 3:4])), paste0( - "Hazard Ratio (", - f_conf_level(conf_level), ")" - )), n_tot = with_label( - sum_cox$n, - "Total n" - ), n_tot_events = with_label( - sum_cox$nevent, - "Total events" - )) - } - ## Core Plotting and Annotation Functions @@ -733,4 +616,3 @@ annot_at_risk <- function(gg_plt, fit_km, font_size = 10, annot_at_risk_title = ) gg_plt } - diff --git a/man/get_cox_pairwise_tbl.Rd b/man/get_cox_pairwise_tbl.Rd new file mode 100644 index 00000000..2061c737 --- /dev/null +++ b/man/get_cox_pairwise_tbl.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gkm.R +\name{get_cox_pairwise_tbl} +\alias{get_cox_pairwise_tbl} +\title{Perform Pairwise Cox Proportional Hazards Regression} +\usage{ +get_cox_pairwise_tbl(model_formula, data, arm, ref_group = NULL) +} +\arguments{ +\item{model_formula}{A \code{\link[stats]{formula}} object specifying the survival model, typically in the form \code{Surv(time, status) ~ arm + covariates}.} + +\item{data}{A \code{\link[base]{data.frame}} containing the survival data, including time, status, and the arm variable.} + +\item{arm}{A character string specifying the name of the column in \code{data} that contains the grouping/treatment arm variable (must be a factor-like variable).} + +\item{ref_group}{A character string specifying the level of the \code{arm} variable to be used as the \strong{reference group} for all pairwise comparisons. If \code{NULL} (the default), the \strong{first unique level} of the \code{arm} column is used as the reference group.} +} +\value{ +A \code{\link[base]{data.frame}} with the results of the pairwise comparisons. The columns include: +\itemize{ +\item \code{arm}: The comparison arm being tested against the reference group. +\item \code{hr}: The Hazard Ratio (HR) for the comparison arm vs. the reference arm, formatted to two decimal places. +\item \code{ci}: The 95\\% confidence interval for the HR, presented as a string in the format "(lower, upper)", with values formatted to two decimal places. +\item \code{pval}: The log-rank p-value for the comparison. +} +} +\description{ +This function performs a pairwise comparison of treatment arms using the \strong{Cox proportional hazards model} and calculates the corresponding \strong{log-rank p-value}. Each comparison is made between a specified reference group and all other comparison groups in the dataset. +} +\details{ +The function iterates through each unique arm (excluding the reference group), filters the data to include only the current comparison arm and the reference arm, and then fits a Cox model (\code{\link[survival]{coxph}}) and performs a log-rank test (\code{\link[survival]{survdiff}}). The Hazard Ratio and its 95\\% confidence interval are extracted from the Cox model summary, and the p-value is calculated from the log-rank test. +} +\examples{ +\dontrun{ +# Example data setup (assuming 'time' is event time, 'status' is event indicator (1=event), +# and 'arm' is the treatment group) +# library(survival) +# data(lung) +# lung$arm <- factor(sample(c("A", "B", "C"), nrow(lung), replace = TRUE)) +# lung$status <- lung$status - 1 # Convert status to 0/1 +# lung <- na.omit(lung) + +ormula <- Surv(time, status) ~ arm +results_tbl <- get_cox_pairwise_tbl(model_formula = formula, +data = lung, +arm = "arm", +ref_group = "A") + print(results_tbl) +} +} diff --git a/man/h_tbl_coxph_pairwise.Rd b/man/h_tbl_coxph_pairwise.Rd deleted file mode 100644 index ee4add72..00000000 --- a/man/h_tbl_coxph_pairwise.Rd +++ /dev/null @@ -1,37 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gkm.R -\name{h_tbl_coxph_pairwise} -\alias{h_tbl_coxph_pairwise} -\title{Pairwise Cox Proportional Hazards Model Summary Table} -\usage{ -h_tbl_coxph_pairwise( - df, - variables, - ref_group_coxph = NULL, - control_coxph_pw = control_coxph(), - annot_coxph_ref_lbls = FALSE -) -} -\arguments{ -\item{df}{A data frame containing the survival data.} - -\item{variables}{A named list specifying the column names for time-to-event (\code{tte}), -treatment arm (\code{arm}), event status (\code{is_event}), and optional strata (\code{strata}) in \code{df}.} - -\item{ref_group_coxph}{An optional string specifying the reference group for the Cox-PH model. -If \code{NULL}, the first factor level of the arm variable is used as the reference group.} - -\item{control_coxph_pw}{A list of control parameters for the Cox-PH model, typically -generated by \code{\link{control_coxph}}, controlling the p-value method, ties handling, and confidence level.} - -\item{annot_coxph_ref_lbls}{A logical flag indicating whether to append "vs. ref group" -to the row names in the resulting table.} -} -\value{ -A data frame summarizing the pairwise Cox-PH results, including Hazard Ratio (HR), -its confidence interval, and the p-value. -} -\description{ -This function computes and formats the results of a pairwise Cox Proportional -Hazards (Cox-PH) regression analysis between different treatment arms. -} diff --git a/man/obj_label-set.Rd b/man/obj_label-set.Rd deleted file mode 100644 index f566f8c6..00000000 --- a/man/obj_label-set.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/from_formatters.R -\name{obj_label<-} -\alias{obj_label<-} -\title{The new label} -\usage{ -obj_label(obj) <- value -} -\arguments{ -\item{value}{character(1). The new label} -} -\description{ -The new label -} diff --git a/man/s_coxph_pairwise.Rd b/man/s_coxph_pairwise.Rd deleted file mode 100644 index 39a44de0..00000000 --- a/man/s_coxph_pairwise.Rd +++ /dev/null @@ -1,44 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gkm.R -\name{s_coxph_pairwise} -\alias{s_coxph_pairwise} -\title{Pairwise Cox Proportional Hazards Model Calculation} -\usage{ -s_coxph_pairwise( - df, - .ref_group, - .in_ref_col, - .var, - is_event, - strata = NULL, - control = control_coxph(), - ... -) -} -\arguments{ -\item{df}{Data frame for the comparison group.} - -\item{.ref_group}{Data frame for the reference group.} - -\item{.in_ref_col}{Logical, if \code{TRUE} returns empty results (for internal table building).} - -\item{.var}{Character string for the time-to-event variable name.} - -\item{is_event}{Character string for the event status variable name.} - -\item{strata}{Optional character vector of stratification variable names.} - -\item{control}{A list of control parameters from \code{\link{control_coxph}}.} - -\item{...}{Additional arguments (not used).} -} -\value{ -A list containing the p-value, Hazard Ratio (HR), confidence interval (HR\_CI), -and total counts. -} -\description{ -Performs a Cox Proportional Hazards model calculation comparing two groups -(a reference group and a comparison group). This is an internal function used by -\code{\link{h_tbl_coxph_pairwise}}. -} -\keyword{internal} From 44b191ca8570220f1a8f218e0d3fd73ac737a50f Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Mon, 1 Dec 2025 14:11:03 +0800 Subject: [PATCH 12/51] update --- R/gkm.R | 57 +++++++++++++++++++++---------------- man/get_cox_pairwise_tbl.Rd | 26 ++++++++--------- 2 files changed, 45 insertions(+), 38 deletions(-) diff --git a/R/gkm.R b/R/gkm.R index 57dfc77e..0bbefbab 100644 --- a/R/gkm.R +++ b/R/gkm.R @@ -1,3 +1,7 @@ +f_conf_level <- function(conf_level) { + # assert_proportion_value(conf_level) # Assuming assert_proportion_value is defined elsewhere + paste0(conf_level * 100, "% CI") +} control_surv_med_annot <- function(x = 0.8, y = 0.85, w = 0.32, h = 0.16, fill = TRUE) { # assert_proportion_value(x) @@ -176,48 +180,51 @@ h_tbl_median_surv <- function(fit_km, armval = "All") { #' @export #' #' @examples -#' \dontrun{ #' # Example data setup (assuming 'time' is event time, 'status' is event indicator (1=event), #' # and 'arm' is the treatment group) -#' # library(survival) -#' # data(lung) -#' # lung$arm <- factor(sample(c("A", "B", "C"), nrow(lung), replace = TRUE)) -#' # lung$status <- lung$status - 1 # Convert status to 0/1 -#' # lung <- na.omit(lung) +#' library(survival) +#' data(lung) +#' lung$arm <- factor(sample(c("A", "B", "C"), nrow(lung), replace = TRUE)) +#' lung$status <- lung$status - 1 # Convert status to 0/1 +#' lung <- na.omit(lung) #' -#' ormula <- Surv(time, status) ~ arm -#' results_tbl <- get_cox_pairwise_tbl(model_formula = formula, -#' data = lung, -#' arm = "arm", -#' ref_group = "A") -#' print(results_tbl) -#' } -get_cox_pairwise_tbl <- function(model_formula, data, arm, ref_group = NULL){ +#' formula <- Surv(time, status) ~ arm +#' results_tbl <- get_cox_pairwise_tbl( +#' model_formula = formula, +#' data = lung, +#' arm = "arm", +#' ref_group = "A" +#' ) +#' print(results_tbl) +get_cox_pairwise_tbl <- function(model_formula, data, arm, ref_group = NULL) { ref_group <- if (!is.null(ref_group)) ref_group else unique(data[[arm]])[1] comp_group <- setdiff(unique(data[[arm]]), ref_group) ret <- c() - for (current_arm in comp_group){ + for (current_arm in comp_group) { comp_df <- data[data[[arm]] %in% c(ref_group, current_arm), ] suppressWarnings( - coxph_ans <- coxph(formula = model_formula, data = comp_df) %>% summary()) + coxph_ans <- coxph(formula = model_formula, data = comp_df) %>% summary() + ) orginal_survdiff <- survdiff(formula = model_formula, data = comp_df) log_rank_pvalue <- 1 - stats::pchisq(orginal_survdiff$chisq, length(orginal_survdiff$n) - - 1) + 1) current_row <- data.frame( - arm = current_arm, - hr = sprintf("%.2f", coxph_ans$conf.int[1,1]), - ci = paste0("(", - sprintf("%.2f", coxph_ans$conf.int[1,3]), - ", ", - sprintf("%.2f", coxph_ans$conf.int[1,4]), - ")"), + hr = sprintf("%.2f", coxph_ans$conf.int[1, 1]), + ci = paste0( + "(", + sprintf("%.2f", coxph_ans$conf.int[1, 3]), + ", ", + sprintf("%.2f", coxph_ans$conf.int[1, 4]), + ")" + ), pval = log_rank_pvalue ) + rownames(current_row) <- current_arm ret <- rbind(ret, current_row) } - return (ret) + return(ret) } diff --git a/man/get_cox_pairwise_tbl.Rd b/man/get_cox_pairwise_tbl.Rd index 2061c737..ff355067 100644 --- a/man/get_cox_pairwise_tbl.Rd +++ b/man/get_cox_pairwise_tbl.Rd @@ -31,20 +31,20 @@ This function performs a pairwise comparison of treatment arms using the \strong The function iterates through each unique arm (excluding the reference group), filters the data to include only the current comparison arm and the reference arm, and then fits a Cox model (\code{\link[survival]{coxph}}) and performs a log-rank test (\code{\link[survival]{survdiff}}). The Hazard Ratio and its 95\\% confidence interval are extracted from the Cox model summary, and the p-value is calculated from the log-rank test. } \examples{ -\dontrun{ # Example data setup (assuming 'time' is event time, 'status' is event indicator (1=event), # and 'arm' is the treatment group) -# library(survival) -# data(lung) -# lung$arm <- factor(sample(c("A", "B", "C"), nrow(lung), replace = TRUE)) -# lung$status <- lung$status - 1 # Convert status to 0/1 -# lung <- na.omit(lung) +library(survival) +data(lung) +lung$arm <- factor(sample(c("A", "B", "C"), nrow(lung), replace = TRUE)) +lung$status <- lung$status - 1 # Convert status to 0/1 +lung <- na.omit(lung) -ormula <- Surv(time, status) ~ arm -results_tbl <- get_cox_pairwise_tbl(model_formula = formula, -data = lung, -arm = "arm", -ref_group = "A") - print(results_tbl) -} +formula <- Surv(time, status) ~ arm +results_tbl <- get_cox_pairwise_tbl( + model_formula = formula, + data = lung, + arm = "arm", + ref_group = "A" +) +print(results_tbl) } From 770c10b4abb2349e23d2485d9b14a0e3dfabb3a1 Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Mon, 1 Dec 2025 14:13:10 +0800 Subject: [PATCH 13/51] updat epkgdown --- _pkgdown.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_pkgdown.yml b/_pkgdown.yml index 0494b77d..ef2fb0c1 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -43,7 +43,7 @@ reference: - label_roche - title: "g km plot" contents: - - h_tbl_coxph_pairwise + - get_cox_pairwise_tbl - h_data_plot - g_km - annot_surv_med From 9c2ef035c4098025b67e5b30576b07530a9dbd7b Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Mon, 1 Dec 2025 14:17:42 +0800 Subject: [PATCH 14/51] restyle: --- R/gkm.R | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/R/gkm.R b/R/gkm.R index 0bbefbab..2ee35001 100644 --- a/R/gkm.R +++ b/R/gkm.R @@ -20,7 +20,6 @@ control_coxph_annot <- function(x = 0.29, y = 0.51, w = 0.4, h = 0.125, fill = T } - #' @title Convert Data Frame to ggplot2 Table Graphic #' #' @description Creates a \code{ggplot2} object that renders a data frame as a table graphic. @@ -285,7 +284,6 @@ h_data_plot <- function(fit_km, } - ## Core Plotting and Annotation Functions #' @title Generate a Kaplan-Meier Plot @@ -319,25 +317,26 @@ h_data_plot <- function(fit_km, #' @importFrom rlang .data #' @export g_km <- function( - surv_plot_data, - col = NULL, - lty = NULL, - lwd = 0.5, - censor_show = TRUE, - pch = 3, - size = 2, - max_time = NULL, - xticks = NULL, - xlab = "Days", - yval = c("Survival", "Failure"), - ylab = paste(yval, "Probability"), - ylim = NULL, - title = NULL, - footnotes = NULL, - font_size = 10, - ci_ribbon = FALSE, - legend_pos = NULL, - ggtheme = NULL) { + surv_plot_data, + col = NULL, + lty = NULL, + lwd = 0.5, + censor_show = TRUE, + pch = 3, + size = 2, + max_time = NULL, + xticks = NULL, + xlab = "Days", + yval = c("Survival", "Failure"), + ylab = paste(yval, "Probability"), + ylim = NULL, + title = NULL, + footnotes = NULL, + font_size = 10, + ci_ribbon = FALSE, + legend_pos = NULL, + ggtheme = NULL +) { # ... (function body remains the same) checkmate::assert_data_frame(surv_plot_data, min.cols = 7, min.rows = 1) data <- surv_plot_data From 10e6312160933b934ce34069187741eed31e4a05 Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Mon, 1 Dec 2025 14:21:31 +0800 Subject: [PATCH 15/51] update --- DESCRIPTION | 1 + R/gkm.R | 10 ++++------ 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 919d91f5..c812bd4c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -35,6 +35,7 @@ Imports: ggsurvfit (>= 1.1.0), glue (>= 1.8.0), gt (>= 0.11.1), + labeling, lifecycle, rlang (>= 1.1.5), survival (>= 3.6-4), diff --git a/R/gkm.R b/R/gkm.R index 2ee35001..4f9cb4fa 100644 --- a/R/gkm.R +++ b/R/gkm.R @@ -96,6 +96,7 @@ df2gg <- function(df, colwidths = NULL, font_size = 10, col_labels = TRUE, #' @param xticks A numeric vector of specific tick positions, a single number for the interval, or \code{NULL} for auto-calculation. #' @param max_time Optional numeric value specifying the maximum time to consider for tick range. #' @keywords internal +#' @importFrom labeling extended #' @return A numeric vector of x-axis tick positions. h_xticks <- function(data, xticks = NULL, max_time = NULL) { # ... (function body remains the same) @@ -467,8 +468,7 @@ g_km <- function( #' #' @param gg_plt A \code{ggplot2} or \code{cowplot} object of the Kaplan-Meier plot. #' @param fit_km A fitted Kaplan-Meier object of class \code{survfit}, used to generate the table data. -#' @param control_annot_surv_med A list of control parameters for the annotation box, -#' typically generated by \code{\link{control_surv_med_annot}}. +#' @param control_annot_surv_med A list of control parameters for the annotation box. #' @param font_size Numeric, base font size for the annotation table. #' #' @return A \code{cowplot} object with the median survival table annotation added. @@ -510,10 +510,8 @@ annot_surv_med <- function(gg_plt, fit_km, control_annot_surv_med = control_surv #' Kaplan-Meier plot using \code{cowplot}. #' #' @param gg_plt A \code{ggplot2} or \code{cowplot} object of the Kaplan-Meier plot. -#' @param coxph_tbl A data frame containing pre-calculated Cox-PH results, -#' typically generated by \code{\link{h_tbl_coxph_pairwise}}. -#' @param control_annot_coxph A list of control parameters for the annotation box, -#' typically generated by \code{\link{control_coxph_annot}}. +#' @param coxph_tbl A data frame containing pre-calculated Cox-PH results. +#' @param control_annot_coxph A list of control parameters for the annotation box. #' @param font_size Numeric, base font size for the annotation table. #' #' @return A \code{cowplot} object with the Cox-PH table annotation added. From 1512115ed63ab9cb157ee7443ce6e49d17e96474 Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Mon, 1 Dec 2025 14:34:10 +0800 Subject: [PATCH 16/51] update test --- tests/testthat/test-gkm.R | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/tests/testthat/test-gkm.R b/tests/testthat/test-gkm.R index f247c6d9..57448e7d 100644 --- a/tests/testthat/test-gkm.R +++ b/tests/testthat/test-gkm.R @@ -1,22 +1,27 @@ skip_on_cran() anl <- cards::ADTTE |> - dplyr::mutate(is_event = CNSR == 0) %>% - dplyr::mutate(TRTP = as.factor(TRTP)) - -variables <- list(tte = "AVAL", is_event = "is_event", arm = "TRTP") + dplyr::mutate(is_event = CNSR == 0) +by = "TRTP" test_that("test gkm() works", { - fit_kmg01 <- survfit(ggsurvfit::Surv_CNSR(AVAL, CNSR) ~ TRTP, anl) - variables <- list(tte = "AVAL", is_event = "is_event", arm = "TRTP") + GROUP_SYM <- rlang::ensym(by) + model_formula <- rlang::new_formula( + lhs = rlang::expr(Surv(AVAL, is_event)), + rhs = rlang::expr(!!GROUP_SYM) + ) + + fit_kmg01 <- survival::survfit(model_formula, anl) + expect_no_error(surv_plot_data <- h_data_plot(fit_kmg01)) expect_no_error( suppressWarnings( - coxph_tbl <- h_tbl_coxph_pairwise( - df = anl, - variables = variables + coxph_tbl <- get_cox_pairwise_tbl( + model_formula, + data = anl, + arm = by ) ) ) From 1e3fa5d1d750f18c5367fe1cb35cc9546db690d1 Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Mon, 1 Dec 2025 16:07:59 +0800 Subject: [PATCH 17/51] update doc --- NAMESPACE | 1 + R/tbl_null_report.R | 4 +++- man/annot_cox_ph.Rd | 6 ++---- man/annot_surv_med.Rd | 3 +-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 84d07f40..3009e9eb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -86,6 +86,7 @@ importFrom(ggplot2,theme_void) importFrom(gtsummary,add_overall) importFrom(gtsummary,filter_hierarchical) importFrom(gtsummary,sort_hierarchical) +importFrom(labeling,extended) importFrom(rlang,.data) importFrom(stats,pchisq) importFrom(survival,Surv) diff --git a/R/tbl_null_report.R b/R/tbl_null_report.R index d8adb081..02b7237e 100644 --- a/R/tbl_null_report.R +++ b/R/tbl_null_report.R @@ -11,7 +11,9 @@ #' @export #' @rdname tbl_null_report tbl_null_report <- function( - label = "Null Report: no observations met the reporting criteria for inclusion in this output.") { + label = + "Null Report: no observations met the reporting criteria for inclusion in this output." +) { set_cli_abort_call() # Check input label ---------------------------------------------------------- diff --git a/man/annot_cox_ph.Rd b/man/annot_cox_ph.Rd index cbfce564..f4303738 100644 --- a/man/annot_cox_ph.Rd +++ b/man/annot_cox_ph.Rd @@ -14,11 +14,9 @@ annot_cox_ph( \arguments{ \item{gg_plt}{A \code{ggplot2} or \code{cowplot} object of the Kaplan-Meier plot.} -\item{coxph_tbl}{A data frame containing pre-calculated Cox-PH results, -typically generated by \code{\link{h_tbl_coxph_pairwise}}.} +\item{coxph_tbl}{A data frame containing pre-calculated Cox-PH results.} -\item{control_annot_coxph}{A list of control parameters for the annotation box, -typically generated by \code{\link{control_coxph_annot}}.} +\item{control_annot_coxph}{A list of control parameters for the annotation box.} \item{font_size}{Numeric, base font size for the annotation table.} } diff --git a/man/annot_surv_med.Rd b/man/annot_surv_med.Rd index 0e2fa21c..a86b706e 100644 --- a/man/annot_surv_med.Rd +++ b/man/annot_surv_med.Rd @@ -16,8 +16,7 @@ annot_surv_med( \item{fit_km}{A fitted Kaplan-Meier object of class \code{survfit}, used to generate the table data.} -\item{control_annot_surv_med}{A list of control parameters for the annotation box, -typically generated by \code{\link{control_surv_med_annot}}.} +\item{control_annot_surv_med}{A list of control parameters for the annotation box.} \item{font_size}{Numeric, base font size for the annotation table.} } From de896d3db37ba96ec1424d4c1b53ba76a3ab2ec4 Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Mon, 1 Dec 2025 16:09:45 +0800 Subject: [PATCH 18/51] not using ggsurvfit yet --- DESCRIPTION | 1 - 1 file changed, 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index c812bd4c..482e2664 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -32,7 +32,6 @@ Imports: dplyr (>= 1.1.4), flextable (>= 0.9.7), ggplot2 (>= 4.0.0), - ggsurvfit (>= 1.1.0), glue (>= 1.8.0), gt (>= 0.11.1), labeling, From c65cf110916524dbbbd3dff11b06ab53959c486d Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Mon, 1 Dec 2025 16:11:48 +0800 Subject: [PATCH 19/51] restyle test --- tests/testthat/test-gkm.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-gkm.R b/tests/testthat/test-gkm.R index 57448e7d..a797b688 100644 --- a/tests/testthat/test-gkm.R +++ b/tests/testthat/test-gkm.R @@ -2,7 +2,7 @@ skip_on_cran() anl <- cards::ADTTE |> dplyr::mutate(is_event = CNSR == 0) -by = "TRTP" +by <- "TRTP" test_that("test gkm() works", { GROUP_SYM <- rlang::ensym(by) From 4364b7a9aed31a0ea475962ddc763717d72a4232 Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Mon, 1 Dec 2025 18:49:00 +0800 Subject: [PATCH 20/51] update assertion --- R/gkm.R | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/R/gkm.R b/R/gkm.R index 4f9cb4fa..dc9df6e7 100644 --- a/R/gkm.R +++ b/R/gkm.R @@ -197,12 +197,14 @@ h_tbl_median_surv <- function(fit_km, armval = "All") { #' ) #' print(results_tbl) get_cox_pairwise_tbl <- function(model_formula, data, arm, ref_group = NULL) { - ref_group <- if (!is.null(ref_group)) ref_group else unique(data[[arm]])[1] - comp_group <- setdiff(unique(data[[arm]]), ref_group) + ref_group <- if (!is.null(ref_group)) ref_group else levels(data[[arm]])[1] + comp_group <- setdiff(levels(data[[arm]]), ref_group) ret <- c() for (current_arm in comp_group) { - comp_df <- data[data[[arm]] %in% c(ref_group, current_arm), ] + subset_arm <- c(ref_group, current_arm) + assertthat::assert_that(length(subset_arm) == 2, msg = "Make sure 2 arms") + comp_df <- data[as.character(data[[arm]]) %in% subset_arm, ] suppressWarnings( coxph_ans <- coxph(formula = model_formula, data = comp_df) %>% summary() ) From 065ca7dfd6c482c9439dfe2e6ca6642c7176ca83 Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Mon, 1 Dec 2025 19:35:15 +0800 Subject: [PATCH 21/51] update assertion --- NAMESPACE | 1 + R/gkm.R | 7 +++++-- tests/testthat/test-gkm.R | 5 +++++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 3009e9eb..ffbe6a0d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -88,6 +88,7 @@ importFrom(gtsummary,filter_hierarchical) importFrom(gtsummary,sort_hierarchical) importFrom(labeling,extended) importFrom(rlang,.data) +importFrom(rlang,ensym) importFrom(stats,pchisq) importFrom(survival,Surv) importFrom(survival,coxph) diff --git a/R/gkm.R b/R/gkm.R index dc9df6e7..652a0fe9 100644 --- a/R/gkm.R +++ b/R/gkm.R @@ -178,7 +178,7 @@ h_tbl_median_surv <- function(fit_km, armval = "All") { #' @importFrom survival coxph Surv survdiff #' @importFrom stats pchisq #' @export -#' +#' @importFrom rlang ensym #' @examples #' # Example data setup (assuming 'time' is event time, 'status' is event indicator (1=event), #' # and 'arm' is the treatment group) @@ -197,7 +197,10 @@ h_tbl_median_surv <- function(fit_km, armval = "All") { #' ) #' print(results_tbl) get_cox_pairwise_tbl <- function(model_formula, data, arm, ref_group = NULL) { - ref_group <- if (!is.null(ref_group)) ref_group else levels(data[[arm]])[1] + msg = paste0(rlang::ensym(data), "[['", rlang::ensym(arm), "']] is not a factor") + assertthat::assert_that(is.factor(data[[arm]]), msg = msg) + ref_group <- if (!is.null(ref_group)) { + ref_group } else {levels(data[[arm]])[1]} comp_group <- setdiff(levels(data[[arm]]), ref_group) ret <- c() diff --git a/tests/testthat/test-gkm.R b/tests/testthat/test-gkm.R index a797b688..3a8e6fbc 100644 --- a/tests/testthat/test-gkm.R +++ b/tests/testthat/test-gkm.R @@ -3,6 +3,11 @@ skip_on_cran() anl <- cards::ADTTE |> dplyr::mutate(is_event = CNSR == 0) by <- "TRTP" +anl[[by]] = factor(anl[[by]], levels = c( + "Placebo", + "Xanomeline Low Dose", + "Xanomeline High Dose" +)) test_that("test gkm() works", { GROUP_SYM <- rlang::ensym(by) From 889c21a6415b4b01098740f7720443719d946d9e Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Mon, 1 Dec 2025 19:57:53 +0800 Subject: [PATCH 22/51] update test --- tests/testthat/test-gkm.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-gkm.R b/tests/testthat/test-gkm.R index 3a8e6fbc..3ff407fb 100644 --- a/tests/testthat/test-gkm.R +++ b/tests/testthat/test-gkm.R @@ -10,7 +10,7 @@ anl[[by]] = factor(anl[[by]], levels = c( )) test_that("test gkm() works", { - GROUP_SYM <- rlang::ensym(by) + GROUP_SYM <- rlang::sym(by) model_formula <- rlang::new_formula( lhs = rlang::expr(Surv(AVAL, is_event)), rhs = rlang::expr(!!GROUP_SYM) From afebed3eb4304e8c494061ca016a6d4591b2dd4c Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Mon, 1 Dec 2025 20:36:49 +0800 Subject: [PATCH 23/51] update --- DESCRIPTION | 3 ++- NAMESPACE | 1 + R/gkm.R | 18 +++++++++++------- man/get_cox_pairwise_tbl.Rd | 10 +++++----- tests/testthat/test-gkm.R | 2 +- 5 files changed, 20 insertions(+), 14 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 482e2664..4de481a7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,7 +22,8 @@ BugReports: https://github.com/insightsengineering/crane/issues Depends: gtsummary (>= 2.4.0.9009), R (>= 4.2) -Imports: +Imports: + assertthat (>= 0.2.1), broom (>= 1.0.8), cards (>= 0.7.0), cardx (>= 0.3.0), diff --git a/NAMESPACE b/NAMESPACE index ffbe6a0d..c7c28f2a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -42,6 +42,7 @@ export(tbl_survfit_times) export(theme_gtsummary_roche) import(glue) import(rlang) +importFrom(asserthat,assert_that) importFrom(broom,tidy) importFrom(cowplot,draw_plot) importFrom(cowplot,ggdraw) diff --git a/R/gkm.R b/R/gkm.R index 652a0fe9..e5fb67f5 100644 --- a/R/gkm.R +++ b/R/gkm.R @@ -179,28 +179,32 @@ h_tbl_median_surv <- function(fit_km, armval = "All") { #' @importFrom stats pchisq #' @export #' @importFrom rlang ensym +#' @importFrom assertthat assert_that #' @examples #' # Example data setup (assuming 'time' is event time, 'status' is event indicator (1=event), #' # and 'arm' is the treatment group) #' library(survival) -#' data(lung) -#' lung$arm <- factor(sample(c("A", "B", "C"), nrow(lung), replace = TRUE)) -#' lung$status <- lung$status - 1 # Convert status to 0/1 -#' lung <- na.omit(lung) +#' use_lung <- lung +#' use_lung$arm <- factor(sample(c("A", "B", "C"), nrow(use_lung), replace = TRUE)) +#' use_lung$status <- use_lung$status - 1 # Convert status to 0/1 +#' use_lung <- na.omit(use_lung) #' #' formula <- Surv(time, status) ~ arm #' results_tbl <- get_cox_pairwise_tbl( #' model_formula = formula, -#' data = lung, +#' data = use_lung, #' arm = "arm", #' ref_group = "A" #' ) #' print(results_tbl) get_cox_pairwise_tbl <- function(model_formula, data, arm, ref_group = NULL) { - msg = paste0(rlang::ensym(data), "[['", rlang::ensym(arm), "']] is not a factor") + msg <- paste0(rlang::ensym(data), "[['", rlang::ensym(arm), "']] is not a factor") assertthat::assert_that(is.factor(data[[arm]]), msg = msg) ref_group <- if (!is.null(ref_group)) { - ref_group } else {levels(data[[arm]])[1]} + ref_group + } else { + levels(data[[arm]])[1] + } comp_group <- setdiff(levels(data[[arm]]), ref_group) ret <- c() diff --git a/man/get_cox_pairwise_tbl.Rd b/man/get_cox_pairwise_tbl.Rd index ff355067..7ebe55ab 100644 --- a/man/get_cox_pairwise_tbl.Rd +++ b/man/get_cox_pairwise_tbl.Rd @@ -34,15 +34,15 @@ The function iterates through each unique arm (excluding the reference group), f # Example data setup (assuming 'time' is event time, 'status' is event indicator (1=event), # and 'arm' is the treatment group) library(survival) -data(lung) -lung$arm <- factor(sample(c("A", "B", "C"), nrow(lung), replace = TRUE)) -lung$status <- lung$status - 1 # Convert status to 0/1 -lung <- na.omit(lung) +use_lung <- lung +use_lung$arm <- factor(sample(c("A", "B", "C"), nrow(use_lung), replace = TRUE)) +use_lung$status <- use_lung$status - 1 # Convert status to 0/1 +use_lung <- na.omit(use_lung) formula <- Surv(time, status) ~ arm results_tbl <- get_cox_pairwise_tbl( model_formula = formula, - data = lung, + data = use_lung, arm = "arm", ref_group = "A" ) diff --git a/tests/testthat/test-gkm.R b/tests/testthat/test-gkm.R index 3ff407fb..94671a23 100644 --- a/tests/testthat/test-gkm.R +++ b/tests/testthat/test-gkm.R @@ -3,7 +3,7 @@ skip_on_cran() anl <- cards::ADTTE |> dplyr::mutate(is_event = CNSR == 0) by <- "TRTP" -anl[[by]] = factor(anl[[by]], levels = c( +anl[[by]] <- factor(anl[[by]], levels = c( "Placebo", "Xanomeline Low Dose", "Xanomeline High Dose" From 8f52316875bd6c3ff9322966f7b760736ba334a5 Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Mon, 1 Dec 2025 20:40:53 +0800 Subject: [PATCH 24/51] update --- NAMESPACE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index c7c28f2a..7d04acf9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -42,7 +42,7 @@ export(tbl_survfit_times) export(theme_gtsummary_roche) import(glue) import(rlang) -importFrom(asserthat,assert_that) +importFrom(assertthat,assert_that) importFrom(broom,tidy) importFrom(cowplot,draw_plot) importFrom(cowplot,ggdraw) From d5a79b79658e9d06fb3fd87ac5f378a1ad86f517 Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Mon, 1 Dec 2025 22:33:24 +0800 Subject: [PATCH 25/51] manual vbump --- DESCRIPTION | 2 +- NEWS.md | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4de481a7..422c6878 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: crane Title: Supplements the 'gtsummary' Package for Pharmaceutical Reporting -Version: 0.2.0.9014 +Version: 0.2.0.9015 Authors@R: c( person("Daniel D.", "Sjoberg", , "danield.sjoberg@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-0862-2018")), diff --git a/NEWS.md b/NEWS.md index 17a76e1d..a5922ec3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,6 @@ -# crane 0.2.0.9014 +# crane 0.2.0.9015 + +* Added `g_km()` function for creating Kaplan-Meier plots. * Added `list("assign_summary_type-arg:cat_threshold" = 0L)` to `theme_gtsummary_roche()`. Numeric variables with few levels will no longer default to summary type `'categorical'` in `gtsummary::tbl_summary()` and `tbl_roche_summary()`. (#79) From fcafc3e731511b75e73147c4ebb283f382ca4363 Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Wed, 3 Dec 2025 09:48:11 +0800 Subject: [PATCH 26/51] rm assertion --- R/gkm.R | 4 ---- 1 file changed, 4 deletions(-) diff --git a/R/gkm.R b/R/gkm.R index e5fb67f5..78b94173 100644 --- a/R/gkm.R +++ b/R/gkm.R @@ -4,10 +4,6 @@ f_conf_level <- function(conf_level) { } control_surv_med_annot <- function(x = 0.8, y = 0.85, w = 0.32, h = 0.16, fill = TRUE) { - # assert_proportion_value(x) - # assert_proportion_value(y) - # assert_proportion_value(w) - # assert_proportion_value(h) list(x = x, y = y, w = w, h = h, fill = fill) } From 7dd95f02dd832f13dd9cc3ef0e2cfc5046674f54 Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Wed, 3 Dec 2025 10:22:42 +0800 Subject: [PATCH 27/51] reorg code --- NAMESPACE | 27 --------------------------- R/gkm.R | 35 ----------------------------------- R/utils_plot.R | 17 +++++++++++++++++ 3 files changed, 17 insertions(+), 62 deletions(-) create mode 100644 R/utils_plot.R diff --git a/NAMESPACE b/NAMESPACE index bcda15b2..b5b646ff 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -62,38 +62,11 @@ importFrom(dplyr,matches) importFrom(dplyr,num_range) importFrom(dplyr,starts_with) importFrom(dplyr,where) -importFrom(ggplot2,.pt) -importFrom(ggplot2,aes) -importFrom(ggplot2,annotate) -importFrom(ggplot2,coord_cartesian) -importFrom(ggplot2,element_blank) -importFrom(ggplot2,element_line) -importFrom(ggplot2,element_rect) -importFrom(ggplot2,element_text) -importFrom(ggplot2,geom_point) -importFrom(ggplot2,geom_ribbon) -importFrom(ggplot2,geom_step) -importFrom(ggplot2,ggplot) -importFrom(ggplot2,guide_legend) -importFrom(ggplot2,guides) -importFrom(ggplot2,labs) -importFrom(ggplot2,margin) -importFrom(ggplot2,scale_color_manual) -importFrom(ggplot2,scale_fill_manual) -importFrom(ggplot2,scale_linetype_manual) -importFrom(ggplot2,scale_shape_manual) -importFrom(ggplot2,scale_x_continuous) -importFrom(ggplot2,scale_y_continuous) -importFrom(ggplot2,theme) -importFrom(ggplot2,theme_bw) -importFrom(ggplot2,theme_void) importFrom(gtsummary,add_difference_row) importFrom(gtsummary,add_overall) importFrom(gtsummary,filter_hierarchical) importFrom(gtsummary,sort_hierarchical) importFrom(labeling,extended) -importFrom(rlang,.data) -importFrom(rlang,ensym) importFrom(stats,pchisq) importFrom(survival,Surv) importFrom(survival,coxph) diff --git a/R/gkm.R b/R/gkm.R index 78b94173..0856270f 100644 --- a/R/gkm.R +++ b/R/gkm.R @@ -1,21 +1,3 @@ -f_conf_level <- function(conf_level) { - # assert_proportion_value(conf_level) # Assuming assert_proportion_value is defined elsewhere - paste0(conf_level * 100, "% CI") -} - -control_surv_med_annot <- function(x = 0.8, y = 0.85, w = 0.32, h = 0.16, fill = TRUE) { - - list(x = x, y = y, w = w, h = h, fill = fill) -} - -control_coxph_annot <- function(x = 0.29, y = 0.51, w = 0.4, h = 0.125, fill = TRUE, ref_lbls = FALSE) { - checkmate::assert_logical(ref_lbls, any.missing = FALSE) - - res <- c(control_surv_med_annot(x = x, y = y, w = w, h = h), list(ref_lbls = ref_lbls)) - res -} - - #' @title Convert Data Frame to ggplot2 Table Graphic #' #' @description Creates a \code{ggplot2} object that renders a data frame as a table graphic. @@ -29,7 +11,6 @@ control_coxph_annot <- function(x = 0.29, y = 0.51, w = 0.4, h = 0.125, fill = T #' @param bg_fill Optional color string for the plot background. #' @keywords internal #' @return A \code{ggplot2} object representing the table. -#' @importFrom ggplot2 ggplot theme_void scale_x_continuous scale_y_continuous theme element_rect annotate element_text .pt df2gg <- function(df, colwidths = NULL, font_size = 10, col_labels = TRUE, col_lab_fontface = "bold", hline = TRUE, bg_fill = NULL) { # ... (function body remains the same) @@ -92,7 +73,6 @@ df2gg <- function(df, colwidths = NULL, font_size = 10, col_labels = TRUE, #' @param xticks A numeric vector of specific tick positions, a single number for the interval, or \code{NULL} for auto-calculation. #' @param max_time Optional numeric value specifying the maximum time to consider for tick range. #' @keywords internal -#' @importFrom labeling extended #' @return A numeric vector of x-axis tick positions. h_xticks <- function(data, xticks = NULL, max_time = NULL) { # ... (function body remains the same) @@ -171,11 +151,7 @@ h_tbl_median_surv <- function(fit_km, armval = "All") { #' #' @details The function iterates through each unique arm (excluding the reference group), filters the data to include only the current comparison arm and the reference arm, and then fits a Cox model (\code{\link[survival]{coxph}}) and performs a log-rank test (\code{\link[survival]{survdiff}}). The Hazard Ratio and its 95\% confidence interval are extracted from the Cox model summary, and the p-value is calculated from the log-rank test. #' -#' @importFrom survival coxph Surv survdiff -#' @importFrom stats pchisq #' @export -#' @importFrom rlang ensym -#' @importFrom assertthat assert_that #' @examples #' # Example data setup (assuming 'time' is event time, 'status' is event indicator (1=event), #' # and 'arm' is the treatment group) @@ -244,7 +220,6 @@ get_cox_pairwise_tbl <- function(model_formula, data, arm, ref_group = NULL) { #' @param max_time Numeric, the maximum time point to include in the data, or \code{NULL} for no limit. #' #' @return A data frame containing the survival curve steps, confidence intervals, and censoring info. -#' @importFrom broom tidy #' @export h_data_plot <- function(fit_km, armval = "All", @@ -319,8 +294,6 @@ h_data_plot <- function(fit_km, #' @param ggtheme An optional \code{ggplot2} theme to apply. #' #' @return A \code{ggplot2} object of the KM plot. -#' @importFrom ggplot2 ggplot aes theme_bw scale_y_continuous labs theme element_text element_blank element_rect element_line geom_step geom_point scale_shape_manual guides guide_legend geom_ribbon scale_color_manual scale_fill_manual scale_linetype_manual coord_cartesian -#' @importFrom rlang .data #' @export g_km <- function( surv_plot_data, @@ -477,8 +450,6 @@ g_km <- function( #' @param font_size Numeric, base font size for the annotation table. #' #' @return A \code{cowplot} object with the median survival table annotation added. -#' @importFrom cowplot ggdraw draw_plot -#' @importFrom ggplot2 theme element_text coord_cartesian scale_x_continuous scale_y_continuous margin #' @export annot_surv_med <- function(gg_plt, fit_km, control_annot_surv_med = control_surv_med_annot(), font_size = 10) { # Determine armval for h_tbl_median_surv, assuming it's available in the calling environment or logic should be updated @@ -520,8 +491,6 @@ annot_surv_med <- function(gg_plt, fit_km, control_annot_surv_med = control_surv #' @param font_size Numeric, base font size for the annotation table. #' #' @return A \code{cowplot} object with the Cox-PH table annotation added. -#' @importFrom cowplot ggdraw draw_plot -#' @importFrom ggplot2 theme element_text coord_cartesian scale_x_continuous scale_y_continuous margin #' @export annot_cox_ph <- function(gg_plt, coxph_tbl, control_annot_coxph = control_coxph_annot(), font_size = 10) { # ... (function body remains the same) @@ -560,10 +529,6 @@ annot_cox_ph <- function(gg_plt, coxph_tbl, control_annot_coxph = control_coxph_ #' @param xlab Character string for the x-axis label on the 'at-risk' table (typically time). #' #' @return A \code{cowplot} object combining the KM plot and the 'Numbers at Risk' table. -#' @importFrom broom tidy -#' @importFrom tidyr pivot_wider -#' @importFrom cowplot plot_grid -#' @importFrom ggplot2 labs theme_bw theme element_text element_blank element_line coord_cartesian scale_x_continuous scale_y_continuous #' @export annot_at_risk <- function(gg_plt, fit_km, font_size = 10, annot_at_risk_title = TRUE, rel_height_plot = 0.75, xlab = "Days") { # ... (function body remains the same) diff --git a/R/utils_plot.R b/R/utils_plot.R new file mode 100644 index 00000000..297af894 --- /dev/null +++ b/R/utils_plot.R @@ -0,0 +1,17 @@ +f_conf_level <- function(conf_level) { + # assert_proportion_value(conf_level) # Assuming assert_proportion_value is defined elsewhere + paste0(conf_level * 100, "% CI") +} + +control_surv_med_annot <- function(x = 0.8, y = 0.85, w = 0.32, h = 0.16, fill = TRUE) { + + list(x = x, y = y, w = w, h = h, fill = fill) +} + +control_coxph_annot <- function(x = 0.29, y = 0.51, w = 0.4, h = 0.125, fill = TRUE, ref_lbls = FALSE) { + checkmate::assert_logical(ref_lbls, any.missing = FALSE) + + res <- c(control_surv_med_annot(x = x, y = y, w = w, h = h), list(ref_lbls = ref_lbls)) + res +} + From 5dfacdd98122a060167c957b0f843741090d2d2e Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Wed, 3 Dec 2025 11:54:35 +0800 Subject: [PATCH 28/51] adding example --- R/gkm.R | 79 +++++++++++++------- R/utils_plot.R | 2 - man/annot_at_risk.Rd | 34 --------- man/annot_cox_ph.Rd | 29 -------- man/annot_surv_med.Rd | 29 -------- man/g_km.Rd | 75 ------------------- man/gkm.Rd | 169 ++++++++++++++++++++++++++++++++++++++++++ man/h_data_plot.Rd | 23 ------ 8 files changed, 221 insertions(+), 219 deletions(-) delete mode 100644 man/annot_at_risk.Rd delete mode 100644 man/annot_cox_ph.Rd delete mode 100644 man/annot_surv_med.Rd delete mode 100644 man/g_km.Rd create mode 100644 man/gkm.Rd delete mode 100644 man/h_data_plot.Rd diff --git a/R/gkm.R b/R/gkm.R index 0856270f..bdf46b23 100644 --- a/R/gkm.R +++ b/R/gkm.R @@ -150,7 +150,6 @@ h_tbl_median_surv <- function(fit_km, armval = "All") { #' } #' #' @details The function iterates through each unique arm (excluding the reference group), filters the data to include only the current comparison arm and the reference arm, and then fits a Cox model (\code{\link[survival]{coxph}}) and performs a log-rank test (\code{\link[survival]{survdiff}}). The Hazard Ratio and its 95\% confidence interval are extracted from the Cox model summary, and the p-value is calculated from the log-rank test. -#' #' @export #' @examples #' # Example data setup (assuming 'time' is event time, 'status' is event indicator (1=event), @@ -170,6 +169,8 @@ h_tbl_median_surv <- function(fit_km, armval = "All") { #' ) #' print(results_tbl) get_cox_pairwise_tbl <- function(model_formula, data, arm, ref_group = NULL) { + msg <- paste0(rlang::ensym(model_formula), " is not a formula") + assertthat::assert_that(rlang::is_formula(model_formula), msg = msg) msg <- paste0(rlang::ensym(data), "[['", rlang::ensym(arm), "']] is not a factor") assertthat::assert_that(is.factor(data[[arm]]), msg = msg) ref_group <- if (!is.null(ref_group)) { @@ -209,9 +210,9 @@ get_cox_pairwise_tbl <- function(model_formula, data, arm, ref_group = NULL) { } -#' @title Prepare Kaplan-Meier Data for Plotting +#' @title Generate a Kaplan-Meier Plot #' -#' @description Takes a fitted \code{survfit} object and processes it into a data frame +#' @description The function \code{h_data_plot} takes a fitted \code{survfit} object and processes it into a data frame #' suitable for plotting a Kaplan-Meier curve with \code{ggplot2}, including extending #' the curve to time zero. #' @@ -219,7 +220,21 @@ get_cox_pairwise_tbl <- function(model_formula, data, arm, ref_group = NULL) { #' @param armval Character string for the strata level if \code{fit_km} has no strata (e.g., "All"). #' @param max_time Numeric, the maximum time point to include in the data, or \code{NULL} for no limit. #' -#' @return A data frame containing the survival curve steps, confidence intervals, and censoring info. +#' @return The function \code{h_data_plot} returns a data frame containing the survival curve steps, confidence intervals, and censoring info. +#' @rdname gkm +#' @examples +#' # Example data setup (assuming 'time' is event time, 'status' is event indicator (1=event), +#' # and 'arm' is the treatment group) +#' library(survival) +#' use_lung <- lung +#' use_lung$arm <- factor(sample(c("A", "B", "C"), nrow(use_lung), replace = TRUE)) +#' use_lung$status <- use_lung$status - 1 # Convert status to 0/1 +#' use_lung <- na.omit(use_lung) +#' formula <- Surv(time, status) ~ arm +#' fit_kmg01 <- survfit(formula, use_lung) +#' surv_plot_data <- h_data_plot(fit_kmg01) +#' head(surv_plot_data) +#' #' @export h_data_plot <- function(fit_km, armval = "All", @@ -265,15 +280,11 @@ h_data_plot <- function(fit_km, } -## Core Plotting and Annotation Functions - -#' @title Generate a Kaplan-Meier Plot -#' -#' @description This function creates a comprehensive \code{ggplot2} object for a Kaplan-Meier +#' @description The function \code{g_km} creates a comprehensive \code{ggplot2} object for a Kaplan-Meier #' survival curve, with support for various customizations like censoring marks, CIs, and axis control. #' #' @param surv_plot_data A data frame containing the pre-processed survival data, ready for plotting. -#' This data should be equivalent to the output of \code{\link{h_data_plot}}. +#' This data should be equivalent to the output of \code{h_data_plot}. #' @param col A character vector of colors for the survival curves. Length should match number of arms. #' @param lty A vector of line types for the survival curves, or \code{NULL} for default. #' @param lwd Numeric value specifying line width for the survival curves. @@ -293,7 +304,14 @@ h_data_plot <- function(fit_km, #' @param legend_pos Numeric vector of length 2 for legend position (x, y) relative to the plot area (0 to 1), or \code{NULL} for auto-placement. #' @param ggtheme An optional \code{ggplot2} theme to apply. #' -#' @return A \code{ggplot2} object of the KM plot. +#' @return The function \code{g_km} returns a \code{ggplot2} object of the KM plot. +#' @rdname gkm +#' @examples +#' # Example of making the KM plot +#' plt_kmg01 <- g_km(surv_plot_data, +#' xlab = "Time (Days)" +#' ) +#' #' @export g_km <- function( surv_plot_data, @@ -316,7 +334,6 @@ g_km <- function( legend_pos = NULL, ggtheme = NULL ) { - # ... (function body remains the same) checkmate::assert_data_frame(surv_plot_data, min.cols = 7, min.rows = 1) data <- surv_plot_data @@ -435,13 +452,10 @@ g_km <- function( } if (!is.null(ggtheme)) gg_plt <- gg_plt + ggtheme - gg_plt } -#' @title Annotate Kaplan-Meier Plot with Median Survival Table -#' -#' @description Adds a median survival time summary table as an annotation box on a +#' @description The \code{annot_surv_med} function adds a median survival time summary table as an annotation box on a #' Kaplan-Meier plot using \code{cowplot}. #' #' @param gg_plt A \code{ggplot2} or \code{cowplot} object of the Kaplan-Meier plot. @@ -449,7 +463,12 @@ g_km <- function( #' @param control_annot_surv_med A list of control parameters for the annotation box. #' @param font_size Numeric, base font size for the annotation table. #' -#' @return A \code{cowplot} object with the median survival table annotation added. +#' @return The \code{annot_surv_med} function returns a \code{cowplot} object with the median survival table annotation added. +#' @rdname gkm +#' @examples +#' # Annotate Kaplan-Meier Plot with Median Survival Table +#' annot_surv_med(plt_kmg01, fit_kmg01) +#' #' @export annot_surv_med <- function(gg_plt, fit_km, control_annot_surv_med = control_surv_med_annot(), font_size = 10) { # Determine armval for h_tbl_median_surv, assuming it's available in the calling environment or logic should be updated @@ -480,9 +499,7 @@ annot_surv_med <- function(gg_plt, fit_km, control_annot_surv_med = control_surv gg_plt } -#' @title Annotate Kaplan-Meier Plot with Cox-PH Table -#' -#' @description Adds a Cox Proportional Hazards summary table as an annotation box on a +#' @description The function \code{annot_cox_ph} adds a Cox Proportional Hazards summary table created by the function \code{\link{get_cox_pairwise_tbl}} as an annotation box on a #' Kaplan-Meier plot using \code{cowplot}. #' #' @param gg_plt A \code{ggplot2} or \code{cowplot} object of the Kaplan-Meier plot. @@ -490,8 +507,14 @@ annot_surv_med <- function(gg_plt, fit_km, control_annot_surv_med = control_surv #' @param control_annot_coxph A list of control parameters for the annotation box. #' @param font_size Numeric, base font size for the annotation table. #' -#' @return A \code{cowplot} object with the Cox-PH table annotation added. +#' @return The function \code{annot_surv_med} returns a \code{cowplot} object with the Cox-PH table annotation added. +#' @rdname gkm #' @export +#' @examples +#' # Annotate Kaplan-Meier Plot with Cox-PH Table +#' coxph_tbl <- get_cox_pairwise_tbl(formula, data = use_lung, arm = "arm", ref_group = "A") +#' annot_surv_med(plt_kmg01, coxph_tbl) +#' annot_cox_ph <- function(gg_plt, coxph_tbl, control_annot_coxph = control_coxph_annot(), font_size = 10) { # ... (function body remains the same) bg_fill <- if (isTRUE(control_annot_coxph[["fill"]])) "#00000020" else control_annot_coxph[["fill"]] @@ -517,9 +540,8 @@ annot_cox_ph <- function(gg_plt, coxph_tbl, control_annot_coxph = control_coxph_ gg_plt } -#' @title Annotate Plot with Numbers at Risk Table -#' -#' @description Adds a "Numbers at Risk" table below a Kaplan-Meier plot using \code{cowplot::plot_grid}. + +#' @description The function \code{annot_at_risk} adds a "Numbers at Risk" table below a Kaplan-Meier plot using \code{cowplot::plot_grid}. #' #' @param gg_plt A \code{ggplot2} or \code{cowplot} object of the Kaplan-Meier plot. #' @param fit_km A fitted Kaplan-Meier object of class \code{survfit}, used to generate the table data. @@ -527,11 +549,14 @@ annot_cox_ph <- function(gg_plt, coxph_tbl, control_annot_coxph = control_coxph_ #' @param annot_at_risk_title Logical, whether to include the title "Patients at Risk:". #' @param rel_height_plot Numeric, relative height of the main plot area compared to the 'at-risk' table (0 to 1). #' @param xlab Character string for the x-axis label on the 'at-risk' table (typically time). -#' -#' @return A \code{cowplot} object combining the KM plot and the 'Numbers at Risk' table. +#' @rdname gkm +#' @return The function \code{annot_at_risk} returns a \code{cowplot} object combining the KM plot and the 'Numbers at Risk' table. #' @export +#' @examples +#' # Annotate Plot with Numbers at Risk Table +#' annot_at_risk(plt_kmg01, fit_kmg01) +#' annot_at_risk <- function(gg_plt, fit_km, font_size = 10, annot_at_risk_title = TRUE, rel_height_plot = 0.75, xlab = "Days") { - # ... (function body remains the same) data <- broom::tidy(fit_km) xticks <- h_xticks(data = data) annot_tbl <- summary(fit_km, times = xticks, extend = TRUE) diff --git a/R/utils_plot.R b/R/utils_plot.R index 297af894..89457842 100644 --- a/R/utils_plot.R +++ b/R/utils_plot.R @@ -4,7 +4,6 @@ f_conf_level <- function(conf_level) { } control_surv_med_annot <- function(x = 0.8, y = 0.85, w = 0.32, h = 0.16, fill = TRUE) { - list(x = x, y = y, w = w, h = h, fill = fill) } @@ -14,4 +13,3 @@ control_coxph_annot <- function(x = 0.29, y = 0.51, w = 0.4, h = 0.125, fill = T res <- c(control_surv_med_annot(x = x, y = y, w = w, h = h), list(ref_lbls = ref_lbls)) res } - diff --git a/man/annot_at_risk.Rd b/man/annot_at_risk.Rd deleted file mode 100644 index 3892dbcc..00000000 --- a/man/annot_at_risk.Rd +++ /dev/null @@ -1,34 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gkm.R -\name{annot_at_risk} -\alias{annot_at_risk} -\title{Annotate Plot with Numbers at Risk Table} -\usage{ -annot_at_risk( - gg_plt, - fit_km, - font_size = 10, - annot_at_risk_title = TRUE, - rel_height_plot = 0.75, - xlab = "Days" -) -} -\arguments{ -\item{gg_plt}{A \code{ggplot2} or \code{cowplot} object of the Kaplan-Meier plot.} - -\item{fit_km}{A fitted Kaplan-Meier object of class \code{survfit}, used to generate the table data.} - -\item{font_size}{Numeric, base font size for the table.} - -\item{annot_at_risk_title}{Logical, whether to include the title "Patients at Risk:".} - -\item{rel_height_plot}{Numeric, relative height of the main plot area compared to the 'at-risk' table (0 to 1).} - -\item{xlab}{Character string for the x-axis label on the 'at-risk' table (typically time).} -} -\value{ -A \code{cowplot} object combining the KM plot and the 'Numbers at Risk' table. -} -\description{ -Adds a "Numbers at Risk" table below a Kaplan-Meier plot using \code{cowplot::plot_grid}. -} diff --git a/man/annot_cox_ph.Rd b/man/annot_cox_ph.Rd deleted file mode 100644 index f4303738..00000000 --- a/man/annot_cox_ph.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gkm.R -\name{annot_cox_ph} -\alias{annot_cox_ph} -\title{Annotate Kaplan-Meier Plot with Cox-PH Table} -\usage{ -annot_cox_ph( - gg_plt, - coxph_tbl, - control_annot_coxph = control_coxph_annot(), - font_size = 10 -) -} -\arguments{ -\item{gg_plt}{A \code{ggplot2} or \code{cowplot} object of the Kaplan-Meier plot.} - -\item{coxph_tbl}{A data frame containing pre-calculated Cox-PH results.} - -\item{control_annot_coxph}{A list of control parameters for the annotation box.} - -\item{font_size}{Numeric, base font size for the annotation table.} -} -\value{ -A \code{cowplot} object with the Cox-PH table annotation added. -} -\description{ -Adds a Cox Proportional Hazards summary table as an annotation box on a -Kaplan-Meier plot using \code{cowplot}. -} diff --git a/man/annot_surv_med.Rd b/man/annot_surv_med.Rd deleted file mode 100644 index a86b706e..00000000 --- a/man/annot_surv_med.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gkm.R -\name{annot_surv_med} -\alias{annot_surv_med} -\title{Annotate Kaplan-Meier Plot with Median Survival Table} -\usage{ -annot_surv_med( - gg_plt, - fit_km, - control_annot_surv_med = control_surv_med_annot(), - font_size = 10 -) -} -\arguments{ -\item{gg_plt}{A \code{ggplot2} or \code{cowplot} object of the Kaplan-Meier plot.} - -\item{fit_km}{A fitted Kaplan-Meier object of class \code{survfit}, used to generate the table data.} - -\item{control_annot_surv_med}{A list of control parameters for the annotation box.} - -\item{font_size}{Numeric, base font size for the annotation table.} -} -\value{ -A \code{cowplot} object with the median survival table annotation added. -} -\description{ -Adds a median survival time summary table as an annotation box on a -Kaplan-Meier plot using \code{cowplot}. -} diff --git a/man/g_km.Rd b/man/g_km.Rd deleted file mode 100644 index f0b15d19..00000000 --- a/man/g_km.Rd +++ /dev/null @@ -1,75 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gkm.R -\name{g_km} -\alias{g_km} -\title{Generate a Kaplan-Meier Plot} -\usage{ -g_km( - surv_plot_data, - col = NULL, - lty = NULL, - lwd = 0.5, - censor_show = TRUE, - pch = 3, - size = 2, - max_time = NULL, - xticks = NULL, - xlab = "Days", - yval = c("Survival", "Failure"), - ylab = paste(yval, "Probability"), - ylim = NULL, - title = NULL, - footnotes = NULL, - font_size = 10, - ci_ribbon = FALSE, - legend_pos = NULL, - ggtheme = NULL -) -} -\arguments{ -\item{surv_plot_data}{A data frame containing the pre-processed survival data, ready for plotting. -This data should be equivalent to the output of \code{\link{h_data_plot}}.} - -\item{col}{A character vector of colors for the survival curves. Length should match number of arms.} - -\item{lty}{A vector of line types for the survival curves, or \code{NULL} for default.} - -\item{lwd}{Numeric value specifying line width for the survival curves.} - -\item{censor_show}{Logical, whether to display censoring marks on the plot.} - -\item{pch}{Plotting character for censoring marks.} - -\item{size}{Size of the censoring marks.} - -\item{max_time}{Numeric, the maximum time point to display on the x-axis.} - -\item{xticks}{Numeric vector of x-axis tick positions, or a single number for the interval, or \code{NULL} for auto.} - -\item{xlab}{Character string for the x-axis label.} - -\item{yval}{Character string, either \code{"Survival"} or \code{"Failure"} to plot Survival or Failure probability.} - -\item{ylab}{Character string for the y-axis label.} - -\item{ylim}{Numeric vector of length 2 for y-axis limits.} - -\item{title}{Character string for the plot title.} - -\item{footnotes}{Character string for plot footnotes/caption.} - -\item{font_size}{Numeric, base font size for the plot theme.} - -\item{ci_ribbon}{Logical, whether to display confidence intervals as a ribbon (area).} - -\item{legend_pos}{Numeric vector of length 2 for legend position (x, y) relative to the plot area (0 to 1), or \code{NULL} for auto-placement.} - -\item{ggtheme}{An optional \code{ggplot2} theme to apply.} -} -\value{ -A \code{ggplot2} object of the KM plot. -} -\description{ -This function creates a comprehensive \code{ggplot2} object for a Kaplan-Meier -survival curve, with support for various customizations like censoring marks, CIs, and axis control. -} diff --git a/man/gkm.Rd b/man/gkm.Rd new file mode 100644 index 00000000..12f56614 --- /dev/null +++ b/man/gkm.Rd @@ -0,0 +1,169 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gkm.R +\name{h_data_plot} +\alias{h_data_plot} +\alias{g_km} +\alias{annot_surv_med} +\alias{annot_cox_ph} +\alias{annot_at_risk} +\title{Generate a Kaplan-Meier Plot} +\usage{ +h_data_plot(fit_km, armval = "All", max_time = NULL) + +g_km( + surv_plot_data, + col = NULL, + lty = NULL, + lwd = 0.5, + censor_show = TRUE, + pch = 3, + size = 2, + max_time = NULL, + xticks = NULL, + xlab = "Days", + yval = c("Survival", "Failure"), + ylab = paste(yval, "Probability"), + ylim = NULL, + title = NULL, + footnotes = NULL, + font_size = 10, + ci_ribbon = FALSE, + legend_pos = NULL, + ggtheme = NULL +) + +annot_surv_med( + gg_plt, + fit_km, + control_annot_surv_med = control_surv_med_annot(), + font_size = 10 +) + +annot_cox_ph( + gg_plt, + coxph_tbl, + control_annot_coxph = control_coxph_annot(), + font_size = 10 +) + +annot_at_risk( + gg_plt, + fit_km, + font_size = 10, + annot_at_risk_title = TRUE, + rel_height_plot = 0.75, + xlab = "Days" +) +} +\arguments{ +\item{fit_km}{A fitted Kaplan-Meier object of class \code{survfit}, used to generate the table data.} + +\item{armval}{Character string for the strata level if \code{fit_km} has no strata (e.g., "All").} + +\item{max_time}{Numeric, the maximum time point to display on the x-axis.} + +\item{surv_plot_data}{A data frame containing the pre-processed survival data, ready for plotting. +This data should be equivalent to the output of \code{h_data_plot}.} + +\item{col}{A character vector of colors for the survival curves. Length should match number of arms.} + +\item{lty}{A vector of line types for the survival curves, or \code{NULL} for default.} + +\item{lwd}{Numeric value specifying line width for the survival curves.} + +\item{censor_show}{Logical, whether to display censoring marks on the plot.} + +\item{pch}{Plotting character for censoring marks.} + +\item{size}{Size of the censoring marks.} + +\item{xticks}{Numeric vector of x-axis tick positions, or a single number for the interval, or \code{NULL} for auto.} + +\item{xlab}{Character string for the x-axis label on the 'at-risk' table (typically time).} + +\item{yval}{Character string, either \code{"Survival"} or \code{"Failure"} to plot Survival or Failure probability.} + +\item{ylab}{Character string for the y-axis label.} + +\item{ylim}{Numeric vector of length 2 for y-axis limits.} + +\item{title}{Character string for the plot title.} + +\item{footnotes}{Character string for plot footnotes/caption.} + +\item{font_size}{Numeric, base font size for the table.} + +\item{ci_ribbon}{Logical, whether to display confidence intervals as a ribbon (area).} + +\item{legend_pos}{Numeric vector of length 2 for legend position (x, y) relative to the plot area (0 to 1), or \code{NULL} for auto-placement.} + +\item{ggtheme}{An optional \code{ggplot2} theme to apply.} + +\item{gg_plt}{A \code{ggplot2} or \code{cowplot} object of the Kaplan-Meier plot.} + +\item{control_annot_surv_med}{A list of control parameters for the annotation box.} + +\item{coxph_tbl}{A data frame containing pre-calculated Cox-PH results.} + +\item{control_annot_coxph}{A list of control parameters for the annotation box.} + +\item{annot_at_risk_title}{Logical, whether to include the title "Patients at Risk:".} + +\item{rel_height_plot}{Numeric, relative height of the main plot area compared to the 'at-risk' table (0 to 1).} +} +\value{ +The function \code{h_data_plot} returns a data frame containing the survival curve steps, confidence intervals, and censoring info. + +The function \code{g_km} returns a \code{ggplot2} object of the KM plot. + +The \code{annot_surv_med} function returns a \code{cowplot} object with the median survival table annotation added. + +The function \code{annot_surv_med} returns a \code{cowplot} object with the Cox-PH table annotation added. + +The function \code{annot_at_risk} returns a \code{cowplot} object combining the KM plot and the 'Numbers at Risk' table. +} +\description{ +The function \code{h_data_plot} takes a fitted \code{survfit} object and processes it into a data frame +suitable for plotting a Kaplan-Meier curve with \code{ggplot2}, including extending +the curve to time zero. + +The function \code{g_km} creates a comprehensive \code{ggplot2} object for a Kaplan-Meier +survival curve, with support for various customizations like censoring marks, CIs, and axis control. + +The \code{annot_surv_med} function adds a median survival time summary table as an annotation box on a +Kaplan-Meier plot using \code{cowplot}. + +The function \code{annot_cox_ph} adds a Cox Proportional Hazards summary table created by the function \code{\link{get_cox_pairwise_tbl}} as an annotation box on a +Kaplan-Meier plot using \code{cowplot}. + +The function \code{annot_at_risk} adds a "Numbers at Risk" table below a Kaplan-Meier plot using \code{cowplot::plot_grid}. +} +\examples{ +# Example data setup (assuming 'time' is event time, 'status' is event indicator (1=event), +# and 'arm' is the treatment group) +library(survival) +use_lung <- lung +use_lung$arm <- factor(sample(c("A", "B", "C"), nrow(use_lung), replace = TRUE)) +use_lung$status <- use_lung$status - 1 # Convert status to 0/1 +use_lung <- na.omit(use_lung) +formula <- Surv(time, status) ~ arm +fit_kmg01 <- survfit(formula, use_lung) +surv_plot_data <- h_data_plot(fit_kmg01) +head(surv_plot_data) + +# Example of making the KM plot +plt_kmg01 <- g_km(surv_plot_data, + xlab = "Time (Days)" +) + +# Annotate Kaplan-Meier Plot with Median Survival Table +annot_surv_med(plt_kmg01, fit_kmg01) + +# Annotate Kaplan-Meier Plot with Cox-PH Table +coxph_tbl <- get_cox_pairwise_tbl(formula, data = use_lung, arm = "arm", ref_group = "A") +annot_surv_med(plt_kmg01, coxph_tbl) + +Annotate Plot with Numbers at Risk Table +annot_at_risk(plt_kmg01, fit_kmg01) + +} diff --git a/man/h_data_plot.Rd b/man/h_data_plot.Rd deleted file mode 100644 index 159bc6c8..00000000 --- a/man/h_data_plot.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gkm.R -\name{h_data_plot} -\alias{h_data_plot} -\title{Prepare Kaplan-Meier Data for Plotting} -\usage{ -h_data_plot(fit_km, armval = "All", max_time = NULL) -} -\arguments{ -\item{fit_km}{A fitted Kaplan-Meier object of class \code{survfit}.} - -\item{armval}{Character string for the strata level if \code{fit_km} has no strata (e.g., "All").} - -\item{max_time}{Numeric, the maximum time point to include in the data, or \code{NULL} for no limit.} -} -\value{ -A data frame containing the survival curve steps, confidence intervals, and censoring info. -} -\description{ -Takes a fitted \code{survfit} object and processes it into a data frame -suitable for plotting a Kaplan-Meier curve with \code{ggplot2}, including extending -the curve to time zero. -} From 6f0f0488669fc4e5e8fc53e67c2de2cae26184ff Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Wed, 3 Dec 2025 11:58:17 +0800 Subject: [PATCH 29/51] updatedoc --- man/gkm.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/gkm.Rd b/man/gkm.Rd index 12f56614..416bb13f 100644 --- a/man/gkm.Rd +++ b/man/gkm.Rd @@ -163,7 +163,7 @@ annot_surv_med(plt_kmg01, fit_kmg01) coxph_tbl <- get_cox_pairwise_tbl(formula, data = use_lung, arm = "arm", ref_group = "A") annot_surv_med(plt_kmg01, coxph_tbl) -Annotate Plot with Numbers at Risk Table +# Annotate Plot with Numbers at Risk Table annot_at_risk(plt_kmg01, fit_kmg01) } From 1d63f4bdb6784c7ff161d31811f992612a908f21 Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Wed, 3 Dec 2025 12:11:37 +0800 Subject: [PATCH 30/51] fix eg --- R/gkm.R | 2 +- man/gkm.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/gkm.R b/R/gkm.R index bdf46b23..94670e04 100644 --- a/R/gkm.R +++ b/R/gkm.R @@ -513,7 +513,7 @@ annot_surv_med <- function(gg_plt, fit_km, control_annot_surv_med = control_surv #' @examples #' # Annotate Kaplan-Meier Plot with Cox-PH Table #' coxph_tbl <- get_cox_pairwise_tbl(formula, data = use_lung, arm = "arm", ref_group = "A") -#' annot_surv_med(plt_kmg01, coxph_tbl) +#' annot_cox_ph(plt_kmg01, coxph_tbl) #' annot_cox_ph <- function(gg_plt, coxph_tbl, control_annot_coxph = control_coxph_annot(), font_size = 10) { # ... (function body remains the same) diff --git a/man/gkm.Rd b/man/gkm.Rd index 416bb13f..3739dd77 100644 --- a/man/gkm.Rd +++ b/man/gkm.Rd @@ -161,7 +161,7 @@ annot_surv_med(plt_kmg01, fit_kmg01) # Annotate Kaplan-Meier Plot with Cox-PH Table coxph_tbl <- get_cox_pairwise_tbl(formula, data = use_lung, arm = "arm", ref_group = "A") -annot_surv_med(plt_kmg01, coxph_tbl) +annot_cox_ph(plt_kmg01, coxph_tbl) # Annotate Plot with Numbers at Risk Table annot_at_risk(plt_kmg01, fit_kmg01) From 67f9bcfd2d3cbd3bfdb8af0b1c968cad2ee4a72e Mon Sep 17 00:00:00 2001 From: melkiades Date: Thu, 4 Dec 2025 16:49:01 +0100 Subject: [PATCH 31/51] changes --- NAMESPACE | 4 +- NEWS.md | 2 +- R/{gkm.R => gg_km.R} | 395 +++++++++++------------------------- R/gg_km_utils.R | 233 +++++++++++++++++++++ R/utils_plot.R | 15 -- _pkgdown.yml | 4 +- man/df2gg.Rd | 2 +- man/get_cox_pairwise_tbl.Rd | 9 +- man/gg_km.Rd | 198 ++++++++++++++++++ man/gkm.Rd | 169 --------------- man/h_tbl_median_surv.Rd | 2 +- man/h_xticks.Rd | 7 +- tests/testthat/test-gkm.R | 4 +- 13 files changed, 565 insertions(+), 479 deletions(-) rename R/{gkm.R => gg_km.R} (50%) create mode 100644 R/gg_km_utils.R delete mode 100644 R/utils_plot.R create mode 100644 man/gg_km.Rd delete mode 100644 man/gkm.Rd diff --git a/NAMESPACE b/NAMESPACE index b5b646ff..48592fa2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -16,15 +16,15 @@ export(annot_at_risk) export(annot_cox_ph) export(annot_surv_med) export(filter_hierarchical) -export(g_km) export(get_cox_pairwise_tbl) -export(h_data_plot) +export(gg_km) export(label_roche_number) export(label_roche_percent) export(label_roche_pvalue) export(label_roche_ratio) export(modify_header_rm_md) export(modify_zero_recode) +export(process_survfit_data) export(remove_duplicate_keys) export(sort_hierarchical) export(style_roche_number) diff --git a/NEWS.md b/NEWS.md index a5922ec3..6d64186c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,6 @@ # crane 0.2.0.9015 -* Added `g_km()` function for creating Kaplan-Meier plots. +* Added `gg_km()` function for creating Kaplan-Meier plots. * Added `list("assign_summary_type-arg:cat_threshold" = 0L)` to `theme_gtsummary_roche()`. Numeric variables with few levels will no longer default to summary type `'categorical'` in `gtsummary::tbl_summary()` and `tbl_roche_summary()`. (#79) diff --git a/R/gkm.R b/R/gg_km.R similarity index 50% rename from R/gkm.R rename to R/gg_km.R index 94670e04..5700334e 100644 --- a/R/gkm.R +++ b/R/gg_km.R @@ -1,247 +1,63 @@ -#' @title Convert Data Frame to ggplot2 Table Graphic +#' Kaplan-Meier Plot with ggplot2 #' -#' @description Creates a \code{ggplot2} object that renders a data frame as a table graphic. +#' @description +#' The function `process_survfit_data` #' -#' @param df The data frame to render. -#' @param colwidths Numeric vector of relative column widths. If \code{NULL}, determined by max character length. -#' @param font_size Numeric base font size. -#' @param col_labels Logical, whether to display column labels (header). -#' @param col_lab_fontface Character string for the font face of column labels (e.g., "bold"). -#' @param hline Logical, whether to draw a horizontal line below the column labels. -#' @param bg_fill Optional color string for the plot background. -#' @keywords internal -#' @return A \code{ggplot2} object representing the table. -df2gg <- function(df, colwidths = NULL, font_size = 10, col_labels = TRUE, - col_lab_fontface = "bold", hline = TRUE, bg_fill = NULL) { - # ... (function body remains the same) - df <- as.data.frame(apply(df, 1:2, function(x) { - if (is.na(x)) { - "NA" - } else { - as.character(x) - } - })) - if (col_labels) { - df <- as.matrix(df) - df <- rbind(colnames(df), df) - } - if (is.null(colwidths)) { - colwidths <- apply(df, 2, function(x) max(nchar(x), na.rm = TRUE)) - } - tot_width <- sum(colwidths) - res <- ggplot2::ggplot(data = df) + - ggplot2::theme_void() + - ggplot2::scale_x_continuous(limits = c( - 0, - tot_width - )) + - ggplot2::scale_y_continuous(limits = c(1, nrow(df))) - if (!is.null(bg_fill)) { - res <- res + ggplot2::theme(plot.background = ggplot2::element_rect(fill = bg_fill)) - } - if (hline) { - res <- res + ggplot2::annotate("segment", - x = 0 + 0.2 * colwidths[2], - xend = tot_width - 0.1 * tail(colwidths, 1), y = nrow(df) - - 0.5, yend = nrow(df) - 0.5 - ) - } - for (i in seq_len(ncol(df))) { - line_pos <- c(if (i == 1) { - 0 - } else { - sum(colwidths[1:(i - - 1)]) - }, sum(colwidths[1:i])) - res <- res + ggplot2::annotate("text", - x = mean(line_pos), y = rev(seq_len(nrow(df))), - label = df[, i], size = font_size / .pt, fontface = if (col_labels) { - c(col_lab_fontface, rep("plain", nrow(df) - 1)) - } else { - rep("plain", nrow(df)) - } - ) - } - res -} +#' @name gg_km +NULL -#' @title Calculate X-axis Ticks +#' @describeIn gg_km takes a fitted [survfit] object and processes it into a data frame +#' suitable for plotting a Kaplan-Meier curve with `ggplot2`. Time zero is also added to the data. #' -#' @description Determines the positions for x-axis ticks based on the data and user input. +#' @param fit_km A fitted Kaplan-Meier object of class `survfit`. +#' @param armval (`string`)\cr +#' A single character string used as the strata level if the input `fit_km` object +#' has no strata (e.g., `"All"`). +#' @param max_time (`numeric` or `NULL`)\cr +#' A single numeric value defining the **maximum time point** to include in the data, +#' or `NULL` for no time limit. #' -#' @param data A data frame containing a \code{time} column. -#' @param xticks A numeric vector of specific tick positions, a single number for the interval, or \code{NULL} for auto-calculation. -#' @param max_time Optional numeric value specifying the maximum time to consider for tick range. -#' @keywords internal -#' @return A numeric vector of x-axis tick positions. -h_xticks <- function(data, xticks = NULL, max_time = NULL) { - # ... (function body remains the same) - if (is.null(xticks)) { - if (is.null(max_time)) { - labeling::extended(range(data$time)[1], range(data$time)[2], m = 5) - } else { - labeling::extended(range(data$time)[1], max(range(data$time)[2], max_time), m = 5) - } - } else if (checkmate::test_number(xticks)) { - if (is.null(max_time)) { - seq(0, max(data$time), xticks) - } else { - seq(0, max(data$time, max_time), xticks) - } - } else if (is.numeric(xticks)) { - xticks - } else { - stop( - paste( - "xticks should be either `NULL`", - "or a single number (interval between x ticks)", - "or a numeric vector (position of ticks on the x axis)" - ) - ) - } -} - -#' @title Median Survival Summary Table +#' @return The function `process_survfit_data` returns a data frame containing the survival +#' curve steps, confidence intervals, and censoring info. #' -#' @description Extracts and formats the median survival time and its confidence interval -#' from a fitted Kaplan-Meier object. +#' @details +#' Data setup assumes `"time"` is event time, `"status"` is event indicator (`1` represents an event), +#' while `"arm"` is the treatment group. #' -#' @param fit_km A fitted Kaplan-Meier object of class \code{survfit}. -#' @param armval Character string to use as the row name if \code{fit_km} has no strata (e.g., "All"). -#' @keywords internal -#' @return A data frame with columns "N", "Median", and the confidence interval label. -h_tbl_median_surv <- function(fit_km, armval = "All") { - # ... (function body remains the same) - y <- if (is.null(fit_km$strata)) { - as.data.frame(t(summary(fit_km)$table), row.names = armval) - } else { - tbl <- summary(fit_km)$table - rownames_lst <- strsplit(sub("=", "equals", rownames(tbl)), "equals") - rownames(tbl) <- matrix(unlist(rownames_lst), ncol = 2, byrow = TRUE)[, 2] - as.data.frame(tbl) - } - conf.int <- summary(fit_km)$conf.int # nolint - y$records <- round(y$records) - y$median <- signif(y$median, 4) - y$`CI` <- paste0( - "(", signif(y[[paste0(conf.int, "LCL")]], 4), ", ", signif(y[[paste0(conf.int, "UCL")]], 4), ")" - ) - stats::setNames( - y[c("records", "median", "CI")], - c("N", "Median", f_conf_level(conf.int)) - ) -} - -#' Perform Pairwise Cox Proportional Hazards Regression -#' -#' This function performs a pairwise comparison of treatment arms using the **Cox proportional hazards model** and calculates the corresponding **log-rank p-value**. Each comparison is made between a specified reference group and all other comparison groups in the dataset. -#' -#' @param model_formula A \code{\link[stats]{formula}} object specifying the survival model, typically in the form \code{Surv(time, status) ~ arm + covariates}. -#' @param data A \code{\link[base]{data.frame}} containing the survival data, including time, status, and the arm variable. -#' @param arm A character string specifying the name of the column in \code{data} that contains the grouping/treatment arm variable (must be a factor-like variable). -#' @param ref_group A character string specifying the level of the \code{arm} variable to be used as the **reference group** for all pairwise comparisons. If \code{NULL} (the default), the **first unique level** of the \code{arm} column is used as the reference group. -#' -#' @return A \code{\link[base]{data.frame}} with the results of the pairwise comparisons. The columns include: -#' \itemize{ -#' \item \code{arm}: The comparison arm being tested against the reference group. -#' \item \code{hr}: The Hazard Ratio (HR) for the comparison arm vs. the reference arm, formatted to two decimal places. -#' \item \code{ci}: The 95\% confidence interval for the HR, presented as a string in the format "(lower, upper)", with values formatted to two decimal places. -#' \item \code{pval}: The log-rank p-value for the comparison. -#' } -#' -#' @details The function iterates through each unique arm (excluding the reference group), filters the data to include only the current comparison arm and the reference arm, and then fits a Cox model (\code{\link[survival]{coxph}}) and performs a log-rank test (\code{\link[survival]{survdiff}}). The Hazard Ratio and its 95\% confidence interval are extracted from the Cox model summary, and the p-value is calculated from the log-rank test. -#' @export #' @examples -#' # Example data setup (assuming 'time' is event time, 'status' is event indicator (1=event), -#' # and 'arm' is the treatment group) -#' library(survival) +#' # Data preparation for KM plot #' use_lung <- lung #' use_lung$arm <- factor(sample(c("A", "B", "C"), nrow(use_lung), replace = TRUE)) #' use_lung$status <- use_lung$status - 1 # Convert status to 0/1 #' use_lung <- na.omit(use_lung) #' -#' formula <- Surv(time, status) ~ arm -#' results_tbl <- get_cox_pairwise_tbl( -#' model_formula = formula, -#' data = use_lung, -#' arm = "arm", -#' ref_group = "A" -#' ) -#' print(results_tbl) -get_cox_pairwise_tbl <- function(model_formula, data, arm, ref_group = NULL) { - msg <- paste0(rlang::ensym(model_formula), " is not a formula") - assertthat::assert_that(rlang::is_formula(model_formula), msg = msg) - msg <- paste0(rlang::ensym(data), "[['", rlang::ensym(arm), "']] is not a factor") - assertthat::assert_that(is.factor(data[[arm]]), msg = msg) - ref_group <- if (!is.null(ref_group)) { - ref_group - } else { - levels(data[[arm]])[1] - } - comp_group <- setdiff(levels(data[[arm]]), ref_group) - - ret <- c() - for (current_arm in comp_group) { - subset_arm <- c(ref_group, current_arm) - assertthat::assert_that(length(subset_arm) == 2, msg = "Make sure 2 arms") - comp_df <- data[as.character(data[[arm]]) %in% subset_arm, ] - suppressWarnings( - coxph_ans <- coxph(formula = model_formula, data = comp_df) %>% summary() - ) - orginal_survdiff <- survdiff(formula = model_formula, data = comp_df) - log_rank_pvalue <- 1 - stats::pchisq(orginal_survdiff$chisq, length(orginal_survdiff$n) - - 1) - current_row <- data.frame( - hr = sprintf("%.2f", coxph_ans$conf.int[1, 1]), - ci = paste0( - "(", - sprintf("%.2f", coxph_ans$conf.int[1, 3]), - ", ", - sprintf("%.2f", coxph_ans$conf.int[1, 4]), - ")" - ), - pval = log_rank_pvalue - ) - rownames(current_row) <- current_arm - ret <- rbind(ret, current_row) - } - - return(ret) -} - - -#' @title Generate a Kaplan-Meier Plot -#' -#' @description The function \code{h_data_plot} takes a fitted \code{survfit} object and processes it into a data frame -#' suitable for plotting a Kaplan-Meier curve with \code{ggplot2}, including extending -#' the curve to time zero. -#' -#' @param fit_km A fitted Kaplan-Meier object of class \code{survfit}. -#' @param armval Character string for the strata level if \code{fit_km} has no strata (e.g., "All"). -#' @param max_time Numeric, the maximum time point to include in the data, or \code{NULL} for no limit. -#' -#' @return The function \code{h_data_plot} returns a data frame containing the survival curve steps, confidence intervals, and censoring info. -#' @rdname gkm -#' @examples -#' # Example data setup (assuming 'time' is event time, 'status' is event indicator (1=event), -#' # and 'arm' is the treatment group) -#' library(survival) -#' use_lung <- lung -#' use_lung$arm <- factor(sample(c("A", "B", "C"), nrow(use_lung), replace = TRUE)) -#' use_lung$status <- use_lung$status - 1 # Convert status to 0/1 -#' use_lung <- na.omit(use_lung) +#' # Fit Kaplan-Meier model #' formula <- Surv(time, status) ~ arm #' fit_kmg01 <- survfit(formula, use_lung) -#' surv_plot_data <- h_data_plot(fit_kmg01) +#' +#' # Process survfit data for plotting +#' surv_plot_data <- process_survfit_data(fit_kmg01) #' head(surv_plot_data) #' #' @export -h_data_plot <- function(fit_km, - armval = "All", - max_time = NULL) { - # ... (function body remains the same) +process_survfit_data <- function(fit_km, + armval = "All", + max_time = NULL) { + set_cli_abort_call() + + # Input checks + if (!inherits(fit_km, "survfit")) { + cli::cli_abort( + "The input {.arg fit_km} must be a fitted Kaplan-Meier object of class {.cls survfit}.", + call = get_cli_abort_call() + ) + } + check_string(armval) + check_numeric(max_time, allow_empty = TRUE) + y <- broom::tidy(fit_km) + # Handle strata factor levels if (!is.null(fit_km$strata)) { fit_km_var_level <- strsplit(sub("=", "equals", names(fit_km$strata)), "equals") strata_levels <- vapply(fit_km_var_level, FUN = "[", FUN.VALUE = "a", i = 2) @@ -254,6 +70,7 @@ h_data_plot <- function(fit_km, y$strata <- armval } + # Extend to time zero y_by_strata <- split(y, y$strata) y_by_strata_extended <- lapply( y_by_strata, @@ -272,68 +89,89 @@ h_data_plot <- function(fit_km, ) y <- do.call(rbind, y_by_strata_extended) + # Censoring points y$censor <- ifelse(y$n.censor > 0, y$estimate, NA) + + # Apply max_time filter if provided if (!is.null(max_time)) { y <- y[y$time <= max(max_time), ] } + y } -#' @description The function \code{g_km} creates a comprehensive \code{ggplot2} object for a Kaplan-Meier -#' survival curve, with support for various customizations like censoring marks, CIs, and axis control. +#' @describeIn gg_km creates a Kaplan-Meier survival curve, with +#' support for various customizations like censoring marks, Confidence Intervals (CIs), and axis control. #' -#' @param surv_plot_data A data frame containing the pre-processed survival data, ready for plotting. -#' This data should be equivalent to the output of \code{h_data_plot}. -#' @param col A character vector of colors for the survival curves. Length should match number of arms. -#' @param lty A vector of line types for the survival curves, or \code{NULL} for default. -#' @param lwd Numeric value specifying line width for the survival curves. -#' @param censor_show Logical, whether to display censoring marks on the plot. -#' @param pch Plotting character for censoring marks. -#' @param size Size of the censoring marks. -#' @param max_time Numeric, the maximum time point to display on the x-axis. -#' @param xticks Numeric vector of x-axis tick positions, or a single number for the interval, or \code{NULL} for auto. -#' @param xlab Character string for the x-axis label. -#' @param yval Character string, either \code{"Survival"} or \code{"Failure"} to plot Survival or Failure probability. -#' @param ylab Character string for the y-axis label. -#' @param ylim Numeric vector of length 2 for y-axis limits. -#' @param title Character string for the plot title. -#' @param footnotes Character string for plot footnotes/caption. -#' @param font_size Numeric, base font size for the plot theme. -#' @param ci_ribbon Logical, whether to display confidence intervals as a ribbon (area). -#' @param legend_pos Numeric vector of length 2 for legend position (x, y) relative to the plot area (0 to 1), or \code{NULL} for auto-placement. -#' @param ggtheme An optional \code{ggplot2} theme to apply. +#' @param surv_plot_data (`data.frame`)\cr +#' A data frame containing the pre-processed survival data, ready for plotting. +#' This data should be equivalent to the output of `process_survfit_data`. +#' @param col (`character`)\cr +#' A character vector of colors (e.g., color names or hexadecimal codes) for the survival curves. +#' The **length must match the number of arms/groups** being plotted. +#' @param lty (`numeric` or `NULL`)\cr +#' A numeric vector of **line types** (e.g., `1` for solid, `2` for dashed) for the survival curves, or `NULL` for `ggplot2` defaults. +#' The length should match the number of arms/groups. +#' @param lwd (`numeric`)\cr +#' A single numeric value specifying the **line width** for the survival curves. +#' @param censor_show (`logical`)\cr +#' A single logical value indicating whether to display **censoring marks** on the plot. Defaults to `TRUE`. +#' @param pch (`numeric`)\cr +#' A single numeric value specifying the **plotting character** (point shape code) for censoring marks. +#' @param size (`numeric`)\cr +#' A single numeric value specifying the **size** of the censoring marks. +#' @param max_time (`numeric`)\cr +#' A single numeric value defining the **maximum time point** to display on the x-axis. Data points beyond this time will be clipped. +#' @param xticks (`numeric` or `NULL`)\cr +#' A numeric vector of explicit **x-axis tick positions**, or a single numeric value representing the **interval** between ticks, or `NULL` for automatic `ggplot2` scaling. +#' @param xlab (`character`)\cr +#' A single character string for the **x-axis label**. +#' @param yval (`character`)\cr +#' A single character string, either `"Survival"` or `"Failure"` to plot the corresponding probability. Case sensitive. +#' @param ylab (`character`)\cr +#' A single character string for the **y-axis label**. +#' @param ylim (`numeric`)\cr +#' A **numeric vector of length 2** defining the lower and upper limits of the y-axis (e.g., `c(0, 1)`). +#' @param title (`character`)\cr +#' A single character string for the **plot title**. +#' @param footnotes (`character`)\cr +#' A single character string for plot **footnotes or caption**. +#' @param font_size (`numeric`)\cr +#' A single numeric value specifying the **base font size** for the plot theme elements. +#' @param ci_ribbon (`logical`)\cr +#' A single logical value indicating whether to display **confidence intervals** as a ribbon (shaded area) around the survival curve. Defaults to `TRUE`. +#' @param legend_pos (`numeric` or `NULL`)\cr +#' A **numeric vector of length 2** defining the **legend position** as (x, y) coordinates relative to the plot area (ranging from 0 to 1), or `NULL` for external, automatic placement. +#' +#' @return The function `gg_km` returns a `ggplot2` object of the KM plot. #' -#' @return The function \code{g_km} returns a \code{ggplot2} object of the KM plot. -#' @rdname gkm #' @examples #' # Example of making the KM plot -#' plt_kmg01 <- g_km(surv_plot_data, +#' plt_kmg01 <- gg_km(surv_plot_data, #' xlab = "Time (Days)" #' ) #' #' @export -g_km <- function( - surv_plot_data, - col = NULL, - lty = NULL, - lwd = 0.5, - censor_show = TRUE, - pch = 3, - size = 2, - max_time = NULL, - xticks = NULL, - xlab = "Days", - yval = c("Survival", "Failure"), - ylab = paste(yval, "Probability"), - ylim = NULL, - title = NULL, - footnotes = NULL, - font_size = 10, - ci_ribbon = FALSE, - legend_pos = NULL, - ggtheme = NULL -) { +gg_km <- function(surv_plot_data, + col = NULL, + lty = NULL, + lwd = 0.5, + censor_show = TRUE, + pch = 3, + size = 2, + max_time = NULL, + xticks = NULL, + xlab = "Days", + yval = c("Survival", "Failure"), + ylab = paste(yval, "Probability"), + ylim = NULL, + title = NULL, + footnotes = NULL, + font_size = 10, + ci_ribbon = FALSE, + legend_pos = NULL, + ggtheme = NULL) { checkmate::assert_data_frame(surv_plot_data, min.cols = 7, min.rows = 1) data <- surv_plot_data @@ -450,12 +288,11 @@ g_km <- function( ggplot2::scale_color_manual(values = col) + ggplot2::scale_fill_manual(values = col) } - if (!is.null(ggtheme)) gg_plt <- gg_plt + ggtheme gg_plt } -#' @description The \code{annot_surv_med} function adds a median survival time summary table as an annotation box on a +#' @describeIn gg_km The \code{annot_surv_med} function adds a median survival time summary table as an annotation box on a #' Kaplan-Meier plot using \code{cowplot}. #' #' @param gg_plt A \code{ggplot2} or \code{cowplot} object of the Kaplan-Meier plot. @@ -464,7 +301,7 @@ g_km <- function( #' @param font_size Numeric, base font size for the annotation table. #' #' @return The \code{annot_surv_med} function returns a \code{cowplot} object with the median survival table annotation added. -#' @rdname gkm +#' #' @examples #' # Annotate Kaplan-Meier Plot with Median Survival Table #' annot_surv_med(plt_kmg01, fit_kmg01) @@ -499,7 +336,7 @@ annot_surv_med <- function(gg_plt, fit_km, control_annot_surv_med = control_surv gg_plt } -#' @description The function \code{annot_cox_ph} adds a Cox Proportional Hazards summary table created by the function \code{\link{get_cox_pairwise_tbl}} as an annotation box on a +#' @describeIn gg_km The function \code{annot_cox_ph} adds a Cox Proportional Hazards summary table created by the function \code{\link{get_cox_pairwise_tbl}} as an annotation box on a #' Kaplan-Meier plot using \code{cowplot}. #' #' @param gg_plt A \code{ggplot2} or \code{cowplot} object of the Kaplan-Meier plot. @@ -508,13 +345,13 @@ annot_surv_med <- function(gg_plt, fit_km, control_annot_surv_med = control_surv #' @param font_size Numeric, base font size for the annotation table. #' #' @return The function \code{annot_surv_med} returns a \code{cowplot} object with the Cox-PH table annotation added. -#' @rdname gkm -#' @export +#' #' @examples #' # Annotate Kaplan-Meier Plot with Cox-PH Table #' coxph_tbl <- get_cox_pairwise_tbl(formula, data = use_lung, arm = "arm", ref_group = "A") #' annot_cox_ph(plt_kmg01, coxph_tbl) #' +#' @export annot_cox_ph <- function(gg_plt, coxph_tbl, control_annot_coxph = control_coxph_annot(), font_size = 10) { # ... (function body remains the same) bg_fill <- if (isTRUE(control_annot_coxph[["fill"]])) "#00000020" else control_annot_coxph[["fill"]] @@ -541,7 +378,7 @@ annot_cox_ph <- function(gg_plt, coxph_tbl, control_annot_coxph = control_coxph_ } -#' @description The function \code{annot_at_risk} adds a "Numbers at Risk" table below a Kaplan-Meier plot using \code{cowplot::plot_grid}. +#' @describeIn gg_km The function \code{annot_at_risk} adds a "Numbers at Risk" table below a Kaplan-Meier plot using \code{cowplot::plot_grid}. #' #' @param gg_plt A \code{ggplot2} or \code{cowplot} object of the Kaplan-Meier plot. #' @param fit_km A fitted Kaplan-Meier object of class \code{survfit}, used to generate the table data. @@ -549,13 +386,13 @@ annot_cox_ph <- function(gg_plt, coxph_tbl, control_annot_coxph = control_coxph_ #' @param annot_at_risk_title Logical, whether to include the title "Patients at Risk:". #' @param rel_height_plot Numeric, relative height of the main plot area compared to the 'at-risk' table (0 to 1). #' @param xlab Character string for the x-axis label on the 'at-risk' table (typically time). -#' @rdname gkm #' @return The function \code{annot_at_risk} returns a \code{cowplot} object combining the KM plot and the 'Numbers at Risk' table. -#' @export +#' #' @examples #' # Annotate Plot with Numbers at Risk Table #' annot_at_risk(plt_kmg01, fit_kmg01) #' +#' @export annot_at_risk <- function(gg_plt, fit_km, font_size = 10, annot_at_risk_title = TRUE, rel_height_plot = 0.75, xlab = "Days") { data <- broom::tidy(fit_km) xticks <- h_xticks(data = data) diff --git a/R/gg_km_utils.R b/R/gg_km_utils.R new file mode 100644 index 00000000..1c64b18c --- /dev/null +++ b/R/gg_km_utils.R @@ -0,0 +1,233 @@ +f_conf_level <- function(conf_level) { + # assert_proportion_value(conf_level) # Assuming assert_proportion_value is defined elsewhere + paste0(conf_level * 100, "% CI") +} + +control_surv_med_annot <- function(x = 0.8, y = 0.85, w = 0.32, h = 0.16, fill = TRUE) { + list(x = x, y = y, w = w, h = h, fill = fill) +} + +control_coxph_annot <- function(x = 0.29, y = 0.51, w = 0.4, h = 0.125, fill = TRUE, ref_lbls = FALSE) { + checkmate::assert_logical(ref_lbls, any.missing = FALSE) + + res <- c(control_surv_med_annot(x = x, y = y, w = w, h = h), list(ref_lbls = ref_lbls)) + res +} + +#' Convert Data Frame to ggplot2 Table Graphic +#' +#' @description Creates a `ggplot2` object that renders a data frame as a table graphic. +#' +#' @param df The data frame to render. +#' @param colwidths Numeric vector of relative column widths. If \code{NULL}, determined by max character length. +#' @param font_size Numeric base font size. +#' @param col_labels Logical, whether to display column labels (header). +#' @param col_lab_fontface Character string for the font face of column labels (e.g., "bold"). +#' @param hline Logical, whether to draw a horizontal line below the column labels. +#' @param bg_fill Optional color string for the plot background. +#' +#' @return A \code{ggplot2} object representing the table. +#' +#' @keywords internal +df2gg <- function(df, colwidths = NULL, font_size = 10, col_labels = TRUE, + col_lab_fontface = "bold", hline = TRUE, bg_fill = NULL) { + df <- as.data.frame(apply(df, 1:2, function(x) { + if (is.na(x)) { + "NA" + } else { + as.character(x) + } + })) + if (col_labels) { + df <- as.matrix(df) + df <- rbind(colnames(df), df) + } + if (is.null(colwidths)) { + colwidths <- apply(df, 2, function(x) max(nchar(x), na.rm = TRUE)) + } + tot_width <- sum(colwidths) + res <- ggplot2::ggplot(data = df) + + ggplot2::theme_void() + + ggplot2::scale_x_continuous(limits = c( + 0, + tot_width + )) + + ggplot2::scale_y_continuous(limits = c(1, nrow(df))) + if (!is.null(bg_fill)) { + res <- res + ggplot2::theme(plot.background = ggplot2::element_rect(fill = bg_fill)) + } + if (hline) { + res <- res + ggplot2::annotate("segment", + x = 0 + 0.2 * colwidths[2], + xend = tot_width - 0.1 * tail(colwidths, 1), y = nrow(df) - + 0.5, yend = nrow(df) - 0.5 + ) + } + for (i in seq_len(ncol(df))) { + line_pos <- c(if (i == 1) { + 0 + } else { + sum(colwidths[1:(i - + 1)]) + }, sum(colwidths[1:i])) + res <- res + ggplot2::annotate("text", + x = mean(line_pos), y = rev(seq_len(nrow(df))), + label = df[, i], size = font_size / .pt, fontface = if (col_labels) { + c(col_lab_fontface, rep("plain", nrow(df) - 1)) + } else { + rep("plain", nrow(df)) + } + ) + } + res +} + +#' Calculate X-axis Ticks +#' +#' @description Determines the positions for x-axis ticks based on the data and user input. +#' +#' @param data A data frame containing a `"time""` column. +#' @param xticks A numeric vector of specific tick positions, a single number for the interval, or +#' `NULL` for auto-calculation. +#' @param max_time Optional numeric value specifying the maximum time to consider for tick range. +#' +#' @return A numeric vector of x-axis tick positions. +#' +#' @keywords internal +h_xticks <- function(data, xticks = NULL, max_time = NULL) { + # ... (function body remains the same) + if (is.null(xticks)) { + if (is.null(max_time)) { + labeling::extended(range(data$time)[1], range(data$time)[2], m = 5) + } else { + labeling::extended(range(data$time)[1], max(range(data$time)[2], max_time), m = 5) + } + } else if (checkmate::test_number(xticks)) { + if (is.null(max_time)) { + seq(0, max(data$time), xticks) + } else { + seq(0, max(data$time, max_time), xticks) + } + } else if (is.numeric(xticks)) { + xticks + } else { + stop( + paste( + "xticks should be either `NULL`", + "or a single number (interval between x ticks)", + "or a numeric vector (position of ticks on the x axis)" + ) + ) + } +} + +#' @title Median Survival Summary Table +#' +#' @description Extracts and formats the median survival time and its confidence interval +#' from a fitted Kaplan-Meier object. +#' +#' @param fit_km A fitted Kaplan-Meier object of class \code{survfit}. +#' @param armval Character string to use as the row name if \code{fit_km} has no strata (e.g., "All"). +#' @keywords internal +#' @return A data frame with columns "N", "Median", and the confidence interval label. +h_tbl_median_surv <- function(fit_km, armval = "All") { + # ... (function body remains the same) + y <- if (is.null(fit_km$strata)) { + as.data.frame(t(summary(fit_km)$table), row.names = armval) + } else { + tbl <- summary(fit_km)$table + rownames_lst <- strsplit(sub("=", "equals", rownames(tbl)), "equals") + rownames(tbl) <- matrix(unlist(rownames_lst), ncol = 2, byrow = TRUE)[, 2] + as.data.frame(tbl) + } + conf.int <- summary(fit_km)$conf.int # nolint + y$records <- round(y$records) + y$median <- signif(y$median, 4) + y$`CI` <- paste0( + "(", signif(y[[paste0(conf.int, "LCL")]], 4), ", ", signif(y[[paste0(conf.int, "UCL")]], 4), ")" + ) + stats::setNames( + y[c("records", "median", "CI")], + c("N", "Median", f_conf_level(conf.int)) + ) +} + +#' Perform Pairwise Cox Proportional Hazards Regression +#' +#' This function performs a pairwise comparison of treatment arms using the **Cox proportional hazards model** and calculates the corresponding **log-rank p-value**. Each comparison is made between a specified reference group and all other comparison groups in the dataset. +#' +#' @param model_formula A [stats::formula] object specifying the survival model, typically in the form \code{Surv(time, status) ~ arm + covariates}. +#' @param data A `data.frame` containing the survival data, including time, status, and the arm variable. +#' @param arm A character string specifying the name of the column in \code{data} that contains the grouping/treatment arm variable (must be a factor-like variable). +#' @param ref_group A character string specifying the level of the \code{arm} variable to be used as the **reference group** for all pairwise comparisons. If \code{NULL} (the default), the **first unique level** of the \code{arm} column is used as the reference group. +#' +#' @return A `data.frame` with the results of the pairwise comparisons. The columns include: +#' \itemize{ +#' \item \code{arm}: The comparison arm being tested against the reference group. +#' \item \code{hr}: The Hazard Ratio (HR) for the comparison arm vs. the reference arm, formatted to two decimal places. +#' \item \code{ci}: The 95\% confidence interval for the HR, presented as a string in the format "(lower, upper)", with values formatted to two decimal places. +#' \item \code{pval}: The log-rank p-value for the comparison. +#' } +#' +#' @details The function iterates through each unique arm (excluding the reference group), filters the data to include only the current comparison arm and the reference arm, and then fits a Cox model (\code{\link[survival]{coxph}}) and performs a log-rank test (\code{\link[survival]{survdiff}}). The Hazard Ratio and its 95\% confidence interval are extracted from the Cox model summary, and the p-value is calculated from the log-rank test. +#' +#' @examples +#' # Example data setup (assuming 'time' is event time, 'status' is event indicator (1=event), +#' # and 'arm' is the treatment group) +#' library(survival) +#' use_lung <- lung +#' use_lung$arm <- factor(sample(c("A", "B", "C"), nrow(use_lung), replace = TRUE)) +#' use_lung$status <- use_lung$status - 1 # Convert status to 0/1 +#' use_lung <- na.omit(use_lung) +#' +#' formula <- Surv(time, status) ~ arm +#' results_tbl <- get_cox_pairwise_tbl( +#' model_formula = formula, +#' data = use_lung, +#' arm = "arm", +#' ref_group = "A" +#' ) +#' print(results_tbl) +#' +#' @export +get_cox_pairwise_tbl <- function(model_formula, data, arm, ref_group = NULL) { + msg <- paste0(rlang::ensym(model_formula), " is not a formula") + assertthat::assert_that(rlang::is_formula(model_formula), msg = msg) + msg <- paste0(rlang::ensym(data), "[['", rlang::ensym(arm), "']] is not a factor") + assertthat::assert_that(is.factor(data[[arm]]), msg = msg) + ref_group <- if (!is.null(ref_group)) { + ref_group + } else { + levels(data[[arm]])[1] + } + comp_group <- setdiff(levels(data[[arm]]), ref_group) + + ret <- c() + for (current_arm in comp_group) { + subset_arm <- c(ref_group, current_arm) + assertthat::assert_that(length(subset_arm) == 2, msg = "Make sure 2 arms") + comp_df <- data[as.character(data[[arm]]) %in% subset_arm, ] + suppressWarnings( + coxph_ans <- coxph(formula = model_formula, data = comp_df) %>% summary() + ) + orginal_survdiff <- survdiff(formula = model_formula, data = comp_df) + log_rank_pvalue <- 1 - stats::pchisq(orginal_survdiff$chisq, length(orginal_survdiff$n) - + 1) + current_row <- data.frame( + hr = sprintf("%.2f", coxph_ans$conf.int[1, 1]), + ci = paste0( + "(", + sprintf("%.2f", coxph_ans$conf.int[1, 3]), + ", ", + sprintf("%.2f", coxph_ans$conf.int[1, 4]), + ")" + ), + pval = log_rank_pvalue + ) + rownames(current_row) <- current_arm + ret <- rbind(ret, current_row) + } + + return(ret) +} + diff --git a/R/utils_plot.R b/R/utils_plot.R deleted file mode 100644 index 89457842..00000000 --- a/R/utils_plot.R +++ /dev/null @@ -1,15 +0,0 @@ -f_conf_level <- function(conf_level) { - # assert_proportion_value(conf_level) # Assuming assert_proportion_value is defined elsewhere - paste0(conf_level * 100, "% CI") -} - -control_surv_med_annot <- function(x = 0.8, y = 0.85, w = 0.32, h = 0.16, fill = TRUE) { - list(x = x, y = y, w = w, h = h, fill = fill) -} - -control_coxph_annot <- function(x = 0.29, y = 0.51, w = 0.4, h = 0.125, fill = TRUE, ref_lbls = FALSE) { - checkmate::assert_logical(ref_lbls, any.missing = FALSE) - - res <- c(control_surv_med_annot(x = x, y = y, w = w, h = h), list(ref_lbls = ref_lbls)) - res -} diff --git a/_pkgdown.yml b/_pkgdown.yml index ef2fb0c1..94a334ea 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -44,8 +44,8 @@ reference: - title: "g km plot" contents: - get_cox_pairwise_tbl - - h_data_plot - - g_km + - process_survfit_data + - gg_km - annot_surv_med - annot_cox_ph - annot_at_risk diff --git a/man/df2gg.Rd b/man/df2gg.Rd index cc7a2be6..22a76862 100644 --- a/man/df2gg.Rd +++ b/man/df2gg.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gkm.R +% Please edit documentation in R/gg_km_utils.R \name{df2gg} \alias{df2gg} \title{Convert Data Frame to ggplot2 Table Graphic} diff --git a/man/get_cox_pairwise_tbl.Rd b/man/get_cox_pairwise_tbl.Rd index 7ebe55ab..db7eb1cb 100644 --- a/man/get_cox_pairwise_tbl.Rd +++ b/man/get_cox_pairwise_tbl.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gkm.R +% Please edit documentation in R/gg_km_utils.R \name{get_cox_pairwise_tbl} \alias{get_cox_pairwise_tbl} \title{Perform Pairwise Cox Proportional Hazards Regression} @@ -7,16 +7,16 @@ get_cox_pairwise_tbl(model_formula, data, arm, ref_group = NULL) } \arguments{ -\item{model_formula}{A \code{\link[stats]{formula}} object specifying the survival model, typically in the form \code{Surv(time, status) ~ arm + covariates}.} +\item{model_formula}{A \link[stats:formula]{stats::formula} object specifying the survival model, typically in the form \code{Surv(time, status) ~ arm + covariates}.} -\item{data}{A \code{\link[base]{data.frame}} containing the survival data, including time, status, and the arm variable.} +\item{data}{A \code{data.frame} containing the survival data, including time, status, and the arm variable.} \item{arm}{A character string specifying the name of the column in \code{data} that contains the grouping/treatment arm variable (must be a factor-like variable).} \item{ref_group}{A character string specifying the level of the \code{arm} variable to be used as the \strong{reference group} for all pairwise comparisons. If \code{NULL} (the default), the \strong{first unique level} of the \code{arm} column is used as the reference group.} } \value{ -A \code{\link[base]{data.frame}} with the results of the pairwise comparisons. The columns include: +A \code{data.frame} with the results of the pairwise comparisons. The columns include: \itemize{ \item \code{arm}: The comparison arm being tested against the reference group. \item \code{hr}: The Hazard Ratio (HR) for the comparison arm vs. the reference arm, formatted to two decimal places. @@ -47,4 +47,5 @@ results_tbl <- get_cox_pairwise_tbl( ref_group = "A" ) print(results_tbl) + } diff --git a/man/gg_km.Rd b/man/gg_km.Rd new file mode 100644 index 00000000..66399f65 --- /dev/null +++ b/man/gg_km.Rd @@ -0,0 +1,198 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gg_km.R +\name{gg_km} +\alias{gg_km} +\alias{process_survfit_data} +\alias{annot_surv_med} +\alias{annot_cox_ph} +\alias{annot_at_risk} +\title{Kaplan-Meier Plot with ggplot2} +\usage{ +process_survfit_data(fit_km, armval = "All", max_time = NULL) + +gg_km( + surv_plot_data, + col = NULL, + lty = NULL, + lwd = 0.5, + censor_show = TRUE, + pch = 3, + size = 2, + max_time = NULL, + xticks = NULL, + xlab = "Days", + yval = c("Survival", "Failure"), + ylab = paste(yval, "Probability"), + ylim = NULL, + title = NULL, + footnotes = NULL, + font_size = 10, + ci_ribbon = FALSE, + legend_pos = NULL, + ggtheme = NULL +) + +annot_surv_med( + gg_plt, + fit_km, + control_annot_surv_med = control_surv_med_annot(), + font_size = 10 +) + +annot_cox_ph( + gg_plt, + coxph_tbl, + control_annot_coxph = control_coxph_annot(), + font_size = 10 +) + +annot_at_risk( + gg_plt, + fit_km, + font_size = 10, + annot_at_risk_title = TRUE, + rel_height_plot = 0.75, + xlab = "Days" +) +} +\arguments{ +\item{fit_km}{A fitted Kaplan-Meier object of class \code{survfit}, used to generate the table data.} + +\item{armval}{(\code{string})\cr +A single character string used as the strata level if the input \code{fit_km} object +has no strata (e.g., \code{"All"}).} + +\item{max_time}{(\code{numeric})\cr +A single numeric value defining the \strong{maximum time point} to display on the x-axis. Data points beyond this time will be clipped.} + +\item{surv_plot_data}{(\code{data.frame})\cr +A data frame containing the pre-processed survival data, ready for plotting. +This data should be equivalent to the output of \code{process_survfit_data}.} + +\item{col}{(\code{character})\cr +A character vector of colors (e.g., color names or hexadecimal codes) for the survival curves. +The \strong{length must match the number of arms/groups} being plotted.} + +\item{lty}{(\code{numeric} or \code{NULL})\cr +A numeric vector of \strong{line types} (e.g., \code{1} for solid, \code{2} for dashed) for the survival curves, or \code{NULL} for \code{ggplot2} defaults. +The length should match the number of arms/groups.} + +\item{lwd}{(\code{numeric})\cr +A single numeric value specifying the \strong{line width} for the survival curves.} + +\item{censor_show}{(\code{logical})\cr +A single logical value indicating whether to display \strong{censoring marks} on the plot. Defaults to \code{TRUE}.} + +\item{pch}{(\code{numeric})\cr +A single numeric value specifying the \strong{plotting character} (point shape code) for censoring marks.} + +\item{size}{(\code{numeric})\cr +A single numeric value specifying the \strong{size} of the censoring marks.} + +\item{xticks}{(\code{numeric} or \code{NULL})\cr +A numeric vector of explicit \strong{x-axis tick positions}, or a single numeric value representing the \strong{interval} between ticks, or \code{NULL} for automatic \code{ggplot2} scaling.} + +\item{xlab}{Character string for the x-axis label on the 'at-risk' table (typically time).} + +\item{yval}{(\code{character})\cr +A single character string, either \code{"Survival"} or \code{"Failure"} to plot the corresponding probability. Case sensitive.} + +\item{ylab}{(\code{character})\cr +A single character string for the \strong{y-axis label}.} + +\item{ylim}{(\code{numeric})\cr +A \strong{numeric vector of length 2} defining the lower and upper limits of the y-axis (e.g., \code{c(0, 1)}).} + +\item{title}{(\code{character})\cr +A single character string for the \strong{plot title}.} + +\item{footnotes}{(\code{character})\cr +A single character string for plot \strong{footnotes or caption}.} + +\item{font_size}{Numeric, base font size for the table.} + +\item{ci_ribbon}{(\code{logical})\cr +A single logical value indicating whether to display \strong{confidence intervals} as a ribbon (shaded area) around the survival curve. Defaults to \code{TRUE}.} + +\item{legend_pos}{(\code{numeric} or \code{NULL})\cr +A \strong{numeric vector of length 2} defining the \strong{legend position} as (x, y) coordinates relative to the plot area (ranging from 0 to 1), or \code{NULL} for external, automatic placement.} + +\item{gg_plt}{A \code{ggplot2} or \code{cowplot} object of the Kaplan-Meier plot.} + +\item{control_annot_surv_med}{A list of control parameters for the annotation box.} + +\item{coxph_tbl}{A data frame containing pre-calculated Cox-PH results.} + +\item{control_annot_coxph}{A list of control parameters for the annotation box.} + +\item{annot_at_risk_title}{Logical, whether to include the title "Patients at Risk:".} + +\item{rel_height_plot}{Numeric, relative height of the main plot area compared to the 'at-risk' table (0 to 1).} +} +\value{ +The function \code{process_survfit_data} returns a data frame containing the survival +curve steps, confidence intervals, and censoring info. + +The function \code{gg_km} returns a \code{ggplot2} object of the KM plot. + +The \code{annot_surv_med} function returns a \code{cowplot} object with the median survival table annotation added. + +The function \code{annot_surv_med} returns a \code{cowplot} object with the Cox-PH table annotation added. + +The function \code{annot_at_risk} returns a \code{cowplot} object combining the KM plot and the 'Numbers at Risk' table. +} +\description{ +The function \code{process_survfit_data} +} +\details{ +Data setup assumes \code{"time"} is event time, \code{"status"} is event indicator (\code{1} represents an event), +while \code{"arm"} is the treatment group. +} +\section{Functions}{ +\itemize{ +\item \code{process_survfit_data()}: takes a fitted \link[survival:survfit]{survival::survfit} object and processes it into a data frame +suitable for plotting a Kaplan-Meier curve with \code{ggplot2}. Time zero is also added to the data. + +\item \code{gg_km()}: creates a Kaplan-Meier survival curve, with +support for various customizations like censoring marks, Confidence Intervals (CIs), and axis control. + +\item \code{annot_surv_med()}: The \code{annot_surv_med} function adds a median survival time summary table as an annotation box on a +Kaplan-Meier plot using \code{cowplot}. + +\item \code{annot_cox_ph()}: The function \code{annot_cox_ph} adds a Cox Proportional Hazards summary table created by the function \code{\link{get_cox_pairwise_tbl}} as an annotation box on a +Kaplan-Meier plot using \code{cowplot}. + +\item \code{annot_at_risk()}: The function \code{annot_at_risk} adds a "Numbers at Risk" table below a Kaplan-Meier plot using \code{cowplot::plot_grid}. + +}} +\examples{ +# Data preparation for KM plot +use_lung <- lung +use_lung$arm <- factor(sample(c("A", "B", "C"), nrow(use_lung), replace = TRUE)) +use_lung$status <- use_lung$status - 1 # Convert status to 0/1 +use_lung <- na.omit(use_lung) + +# Fit Kaplan-Meier model +formula <- Surv(time, status) ~ arm +fit_kmg01 <- survfit(formula, use_lung) + +# Process survfit data for plotting +surv_plot_data <- process_survfit_data(fit_kmg01) +head(surv_plot_data) + +# Example of making the KM plot +plt_kmg01 <- gg_km(surv_plot_data, + xlab = "Time (Days)" +) + +# Annotate Kaplan-Meier Plot with Median Survival Table +annot_surv_med(plt_kmg01, fit_kmg01) + +# Annotate Kaplan-Meier Plot with Cox-PH Table +coxph_tbl <- get_cox_pairwise_tbl(formula, data = use_lung, arm = "arm", ref_group = "A") +annot_cox_ph(plt_kmg01, coxph_tbl) + +# Annotate Plot with Numbers at Risk Table +annot_at_risk(plt_kmg01, fit_kmg01) + +} diff --git a/man/gkm.Rd b/man/gkm.Rd deleted file mode 100644 index 3739dd77..00000000 --- a/man/gkm.Rd +++ /dev/null @@ -1,169 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gkm.R -\name{h_data_plot} -\alias{h_data_plot} -\alias{g_km} -\alias{annot_surv_med} -\alias{annot_cox_ph} -\alias{annot_at_risk} -\title{Generate a Kaplan-Meier Plot} -\usage{ -h_data_plot(fit_km, armval = "All", max_time = NULL) - -g_km( - surv_plot_data, - col = NULL, - lty = NULL, - lwd = 0.5, - censor_show = TRUE, - pch = 3, - size = 2, - max_time = NULL, - xticks = NULL, - xlab = "Days", - yval = c("Survival", "Failure"), - ylab = paste(yval, "Probability"), - ylim = NULL, - title = NULL, - footnotes = NULL, - font_size = 10, - ci_ribbon = FALSE, - legend_pos = NULL, - ggtheme = NULL -) - -annot_surv_med( - gg_plt, - fit_km, - control_annot_surv_med = control_surv_med_annot(), - font_size = 10 -) - -annot_cox_ph( - gg_plt, - coxph_tbl, - control_annot_coxph = control_coxph_annot(), - font_size = 10 -) - -annot_at_risk( - gg_plt, - fit_km, - font_size = 10, - annot_at_risk_title = TRUE, - rel_height_plot = 0.75, - xlab = "Days" -) -} -\arguments{ -\item{fit_km}{A fitted Kaplan-Meier object of class \code{survfit}, used to generate the table data.} - -\item{armval}{Character string for the strata level if \code{fit_km} has no strata (e.g., "All").} - -\item{max_time}{Numeric, the maximum time point to display on the x-axis.} - -\item{surv_plot_data}{A data frame containing the pre-processed survival data, ready for plotting. -This data should be equivalent to the output of \code{h_data_plot}.} - -\item{col}{A character vector of colors for the survival curves. Length should match number of arms.} - -\item{lty}{A vector of line types for the survival curves, or \code{NULL} for default.} - -\item{lwd}{Numeric value specifying line width for the survival curves.} - -\item{censor_show}{Logical, whether to display censoring marks on the plot.} - -\item{pch}{Plotting character for censoring marks.} - -\item{size}{Size of the censoring marks.} - -\item{xticks}{Numeric vector of x-axis tick positions, or a single number for the interval, or \code{NULL} for auto.} - -\item{xlab}{Character string for the x-axis label on the 'at-risk' table (typically time).} - -\item{yval}{Character string, either \code{"Survival"} or \code{"Failure"} to plot Survival or Failure probability.} - -\item{ylab}{Character string for the y-axis label.} - -\item{ylim}{Numeric vector of length 2 for y-axis limits.} - -\item{title}{Character string for the plot title.} - -\item{footnotes}{Character string for plot footnotes/caption.} - -\item{font_size}{Numeric, base font size for the table.} - -\item{ci_ribbon}{Logical, whether to display confidence intervals as a ribbon (area).} - -\item{legend_pos}{Numeric vector of length 2 for legend position (x, y) relative to the plot area (0 to 1), or \code{NULL} for auto-placement.} - -\item{ggtheme}{An optional \code{ggplot2} theme to apply.} - -\item{gg_plt}{A \code{ggplot2} or \code{cowplot} object of the Kaplan-Meier plot.} - -\item{control_annot_surv_med}{A list of control parameters for the annotation box.} - -\item{coxph_tbl}{A data frame containing pre-calculated Cox-PH results.} - -\item{control_annot_coxph}{A list of control parameters for the annotation box.} - -\item{annot_at_risk_title}{Logical, whether to include the title "Patients at Risk:".} - -\item{rel_height_plot}{Numeric, relative height of the main plot area compared to the 'at-risk' table (0 to 1).} -} -\value{ -The function \code{h_data_plot} returns a data frame containing the survival curve steps, confidence intervals, and censoring info. - -The function \code{g_km} returns a \code{ggplot2} object of the KM plot. - -The \code{annot_surv_med} function returns a \code{cowplot} object with the median survival table annotation added. - -The function \code{annot_surv_med} returns a \code{cowplot} object with the Cox-PH table annotation added. - -The function \code{annot_at_risk} returns a \code{cowplot} object combining the KM plot and the 'Numbers at Risk' table. -} -\description{ -The function \code{h_data_plot} takes a fitted \code{survfit} object and processes it into a data frame -suitable for plotting a Kaplan-Meier curve with \code{ggplot2}, including extending -the curve to time zero. - -The function \code{g_km} creates a comprehensive \code{ggplot2} object for a Kaplan-Meier -survival curve, with support for various customizations like censoring marks, CIs, and axis control. - -The \code{annot_surv_med} function adds a median survival time summary table as an annotation box on a -Kaplan-Meier plot using \code{cowplot}. - -The function \code{annot_cox_ph} adds a Cox Proportional Hazards summary table created by the function \code{\link{get_cox_pairwise_tbl}} as an annotation box on a -Kaplan-Meier plot using \code{cowplot}. - -The function \code{annot_at_risk} adds a "Numbers at Risk" table below a Kaplan-Meier plot using \code{cowplot::plot_grid}. -} -\examples{ -# Example data setup (assuming 'time' is event time, 'status' is event indicator (1=event), -# and 'arm' is the treatment group) -library(survival) -use_lung <- lung -use_lung$arm <- factor(sample(c("A", "B", "C"), nrow(use_lung), replace = TRUE)) -use_lung$status <- use_lung$status - 1 # Convert status to 0/1 -use_lung <- na.omit(use_lung) -formula <- Surv(time, status) ~ arm -fit_kmg01 <- survfit(formula, use_lung) -surv_plot_data <- h_data_plot(fit_kmg01) -head(surv_plot_data) - -# Example of making the KM plot -plt_kmg01 <- g_km(surv_plot_data, - xlab = "Time (Days)" -) - -# Annotate Kaplan-Meier Plot with Median Survival Table -annot_surv_med(plt_kmg01, fit_kmg01) - -# Annotate Kaplan-Meier Plot with Cox-PH Table -coxph_tbl <- get_cox_pairwise_tbl(formula, data = use_lung, arm = "arm", ref_group = "A") -annot_cox_ph(plt_kmg01, coxph_tbl) - -# Annotate Plot with Numbers at Risk Table -annot_at_risk(plt_kmg01, fit_kmg01) - -} diff --git a/man/h_tbl_median_surv.Rd b/man/h_tbl_median_surv.Rd index 8d181629..fb2b827f 100644 --- a/man/h_tbl_median_surv.Rd +++ b/man/h_tbl_median_surv.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gkm.R +% Please edit documentation in R/gg_km_utils.R \name{h_tbl_median_surv} \alias{h_tbl_median_surv} \title{Median Survival Summary Table} diff --git a/man/h_xticks.Rd b/man/h_xticks.Rd index 2f912508..e61f4a50 100644 --- a/man/h_xticks.Rd +++ b/man/h_xticks.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gkm.R +% Please edit documentation in R/gg_km_utils.R \name{h_xticks} \alias{h_xticks} \title{Calculate X-axis Ticks} @@ -7,9 +7,10 @@ h_xticks(data, xticks = NULL, max_time = NULL) } \arguments{ -\item{data}{A data frame containing a \code{time} column.} +\item{data}{A data frame containing a \verb{"time""} column.} -\item{xticks}{A numeric vector of specific tick positions, a single number for the interval, or \code{NULL} for auto-calculation.} +\item{xticks}{A numeric vector of specific tick positions, a single number for the interval, or +\code{NULL} for auto-calculation.} \item{max_time}{Optional numeric value specifying the maximum time to consider for tick range.} } diff --git a/tests/testthat/test-gkm.R b/tests/testthat/test-gkm.R index 94671a23..3c64b849 100644 --- a/tests/testthat/test-gkm.R +++ b/tests/testthat/test-gkm.R @@ -19,7 +19,7 @@ test_that("test gkm() works", { fit_kmg01 <- survival::survfit(model_formula, anl) - expect_no_error(surv_plot_data <- h_data_plot(fit_kmg01)) + expect_no_error(surv_plot_data <- process_survfit_data(fit_kmg01)) expect_no_error( suppressWarnings( @@ -32,7 +32,7 @@ test_that("test gkm() works", { ) expect_no_error( - plt_kmg01 <- g_km(surv_plot_data, + plt_kmg01 <- gg_km(surv_plot_data, xlab = "Time (Days)", ylim = c(0.9, 1) ) %>% From e1006348fa9770f62009ce7d6021dc0d90c53a2d Mon Sep 17 00:00:00 2001 From: melkiades Date: Thu, 4 Dec 2025 17:24:43 +0100 Subject: [PATCH 32/51] removing more parameters --- DESCRIPTION | 2 -- NAMESPACE | 1 - R/crane-package.R | 1 - R/gg_km.R | 75 +++++++++++++++++++++++++++-------------------- R/gg_km_utils.R | 62 +++++++++++++++++++++++++++------------ man/gg_km.Rd | 36 ++++++++++------------- 6 files changed, 101 insertions(+), 76 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 422c6878..5c0be7c3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -23,12 +23,10 @@ Depends: gtsummary (>= 2.4.0.9009), R (>= 4.2) Imports: - assertthat (>= 0.2.1), broom (>= 1.0.8), cards (>= 0.7.0), cardx (>= 0.3.0), cowplot (>= 1.2.0), - checkmate (>= 2.3.2), cli (>= 3.6.4), dplyr (>= 1.1.4), flextable (>= 0.9.7), diff --git a/NAMESPACE b/NAMESPACE index 48592fa2..55964c5c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -45,7 +45,6 @@ export(theme_gtsummary_roche) import(ggplot2) import(glue) import(rlang) -importFrom(assertthat,assert_that) importFrom(broom,tidy) importFrom(cowplot,draw_plot) importFrom(cowplot,ggdraw) diff --git a/R/crane-package.R b/R/crane-package.R index bf9042c9..bb6a7069 100644 --- a/R/crane-package.R +++ b/R/crane-package.R @@ -2,7 +2,6 @@ #' @import rlang #' @import ggplot2 #' @import glue glue -#' @importFrom assertthat assert_that #' @importFrom broom tidy #' @importFrom cowplot plot_grid #' @importFrom cowplot ggdraw draw_plot diff --git a/R/gg_km.R b/R/gg_km.R index 5700334e..9c43424c 100644 --- a/R/gg_km.R +++ b/R/gg_km.R @@ -1,7 +1,11 @@ -#' Kaplan-Meier Plot with ggplot2 +#' Kaplan-Meier Plot #' #' @description -#' The function `process_survfit_data` +#' This set of functions facilitates the creation of Kaplan-Meier survival plots using `ggplot2`. Use +#' `process_survfit_data()` to prepare the survival data from a fitted `survfit` object, and then +#' `gg_km()` to generate the Kaplan-Meier plot with various customization options. Additional functions +#' like `annot_surv_med()`, `annot_cox_ph()`, and `annot_at_risk()` allow for adding summary tables and +#' annotations to the plot. #' #' @name gg_km NULL @@ -107,9 +111,6 @@ process_survfit_data <- function(fit_km, #' @param surv_plot_data (`data.frame`)\cr #' A data frame containing the pre-processed survival data, ready for plotting. #' This data should be equivalent to the output of `process_survfit_data`. -#' @param col (`character`)\cr -#' A character vector of colors (e.g., color names or hexadecimal codes) for the survival curves. -#' The **length must match the number of arms/groups** being plotted. #' @param lty (`numeric` or `NULL`)\cr #' A numeric vector of **line types** (e.g., `1` for solid, `2` for dashed) for the survival curves, or `NULL` for `ggplot2` defaults. #' The length should match the number of arms/groups. @@ -133,14 +134,8 @@ process_survfit_data <- function(fit_km, #' A single character string for the **y-axis label**. #' @param ylim (`numeric`)\cr #' A **numeric vector of length 2** defining the lower and upper limits of the y-axis (e.g., `c(0, 1)`). -#' @param title (`character`)\cr -#' A single character string for the **plot title**. -#' @param footnotes (`character`)\cr -#' A single character string for plot **footnotes or caption**. #' @param font_size (`numeric`)\cr #' A single numeric value specifying the **base font size** for the plot theme elements. -#' @param ci_ribbon (`logical`)\cr -#' A single logical value indicating whether to display **confidence intervals** as a ribbon (shaded area) around the survival curve. Defaults to `TRUE`. #' @param legend_pos (`numeric` or `NULL`)\cr #' A **numeric vector of length 2** defining the **legend position** as (x, y) coordinates relative to the plot area (ranging from 0 to 1), or `NULL` for external, automatic placement. #' @@ -152,9 +147,16 @@ process_survfit_data <- function(fit_km, #' xlab = "Time (Days)" #' ) #' +#' # Confidence Interval as Ribbon +#' plt_kmg01 + +#' ggplot2::geom_ribbon(alpha = 0.3, lty = 0, na.rm = TRUE) +#' +#' # Adding Title and Footnotes +#' plt_kmg01 + +#' ggplot2::labs(title = "title", caption = "footnotes") +#' #' @export gg_km <- function(surv_plot_data, - col = NULL, lty = NULL, lwd = 0.5, censor_show = TRUE, @@ -166,17 +168,30 @@ gg_km <- function(surv_plot_data, yval = c("Survival", "Failure"), ylab = paste(yval, "Probability"), ylim = NULL, - title = NULL, - footnotes = NULL, font_size = 10, - ci_ribbon = FALSE, - legend_pos = NULL, - ggtheme = NULL) { - checkmate::assert_data_frame(surv_plot_data, min.cols = 7, min.rows = 1) + legend_pos = NULL) { + set_cli_abort_call() + + # Input checks + check_data_frame(surv_plot_data) + needed_cols <- c("time", "estimate", "conf.low", "conf.high", "strata", "n.censor", "censor") + if (!all(needed_cols %in% colnames(surv_plot_data))) { + cli::cli_abort( + "The input {.arg surv_plot_data} must contain the following columns: ", + "{.code time}, {.code estimate}, {.code conf.low}, {.code conf.high}, {.code strata}, {.code n.censor}, and {.code censor}.", + call = get_cli_abort_call() + ) + } + if (nrow(surv_plot_data) < 1) { + cli::cli_abort( + "The input {.arg surv_plot_data} must contain at least one row of data.", + call = get_cli_abort_call() + ) + } + data <- surv_plot_data armval <- levels(data$strata) - checkmate::assert_vector(col, len = length(armval), null.ok = TRUE) yval <- match.arg(yval) @@ -208,13 +223,17 @@ gg_km <- function(surv_plot_data, ) + ggplot2::theme_bw(base_size = font_size) + ggplot2::scale_y_continuous(limits = ylim, expand = c(0.025, 0)) + - ggplot2::labs(title = title, x = xlab, y = ylab, caption = footnotes) + + ggplot2::labs(x = xlab, y = ylab) + ggplot2::theme( - axis.text = ggplot2::element_text(size = font_size), axis.title = ggplot2::element_text(size = font_size), - legend.title = ggplot2::element_blank(), legend.text = ggplot2::element_text(size = font_size), + axis.text = ggplot2::element_text(size = font_size), + axis.title = ggplot2::element_text(size = font_size), + legend.title = ggplot2::element_blank(), + legend.text = ggplot2::element_text(size = font_size), legend.box.background = ggplot2::element_rect(fill = "white", linewidth = 0.5), - legend.background = ggplot2::element_blank(), legend.position = "inside", - legend.spacing.y = ggplot2::unit(-0.02, "npc"), panel.grid.major = ggplot2::element_blank(), + legend.background = ggplot2::element_blank(), + legend.position = "inside", + legend.spacing.y = ggplot2::unit(-0.02, "npc"), + panel.grid.major = ggplot2::element_blank(), panel.grid.minor = ggplot2::element_blank() ) @@ -281,14 +300,6 @@ gg_km <- function(surv_plot_data, ggplot2::guides(fill = ggplot2::guide_legend(override.aes = list(shape = NA))) } - if (ci_ribbon) gg_plt <- gg_plt + ggplot2::geom_ribbon(alpha = 0.3, lty = 0, na.rm = TRUE) - - if (!is.null(col)) { - gg_plt <- gg_plt + - ggplot2::scale_color_manual(values = col) + - ggplot2::scale_fill_manual(values = col) - } - gg_plt } diff --git a/R/gg_km_utils.R b/R/gg_km_utils.R index 1c64b18c..89e6bc13 100644 --- a/R/gg_km_utils.R +++ b/R/gg_km_utils.R @@ -1,5 +1,4 @@ f_conf_level <- function(conf_level) { - # assert_proportion_value(conf_level) # Assuming assert_proportion_value is defined elsewhere paste0(conf_level * 100, "% CI") } @@ -8,7 +7,15 @@ control_surv_med_annot <- function(x = 0.8, y = 0.85, w = 0.32, h = 0.16, fill = } control_coxph_annot <- function(x = 0.29, y = 0.51, w = 0.4, h = 0.125, fill = TRUE, ref_lbls = FALSE) { - checkmate::assert_logical(ref_lbls, any.missing = FALSE) + set_cli_abort_call() + + check_logical(ref_lbls) + if ((!anyNA(ref_lbls) && length(ref_lbls) >= 1)) { + cli::cli_abort( + "{.arg ref_lbls} must be a single {.cls logical} value (TRUE or FALSE).", + call = get_cli_abort_call() + ) + } res <- c(control_surv_med_annot(x = x, y = y, w = w, h = h), list(ref_lbls = ref_lbls)) res @@ -58,9 +65,9 @@ df2gg <- function(df, colwidths = NULL, font_size = 10, col_labels = TRUE, } if (hline) { res <- res + ggplot2::annotate("segment", - x = 0 + 0.2 * colwidths[2], - xend = tot_width - 0.1 * tail(colwidths, 1), y = nrow(df) - - 0.5, yend = nrow(df) - 0.5 + x = 0 + 0.2 * colwidths[2], + xend = tot_width - 0.1 * tail(colwidths, 1), y = nrow(df) - + 0.5, yend = nrow(df) - 0.5 ) } for (i in seq_len(ncol(df))) { @@ -68,15 +75,15 @@ df2gg <- function(df, colwidths = NULL, font_size = 10, col_labels = TRUE, 0 } else { sum(colwidths[1:(i - - 1)]) + 1)]) }, sum(colwidths[1:i])) res <- res + ggplot2::annotate("text", - x = mean(line_pos), y = rev(seq_len(nrow(df))), - label = df[, i], size = font_size / .pt, fontface = if (col_labels) { - c(col_lab_fontface, rep("plain", nrow(df) - 1)) - } else { - rep("plain", nrow(df)) - } + x = mean(line_pos), y = rev(seq_len(nrow(df))), + label = df[, i], size = font_size / .pt, fontface = if (col_labels) { + c(col_lab_fontface, rep("plain", nrow(df) - 1)) + } else { + rep("plain", nrow(df)) + } ) } res @@ -191,10 +198,23 @@ h_tbl_median_surv <- function(fit_km, armval = "All") { #' #' @export get_cox_pairwise_tbl <- function(model_formula, data, arm, ref_group = NULL) { - msg <- paste0(rlang::ensym(model_formula), " is not a formula") - assertthat::assert_that(rlang::is_formula(model_formula), msg = msg) - msg <- paste0(rlang::ensym(data), "[['", rlang::ensym(arm), "']] is not a factor") - assertthat::assert_that(is.factor(data[[arm]]), msg = msg) + set_cli_abort_call() + + # Input checks + if (!rlang::is_formula(model_formula)) { + cli::cli_abort( + "{.arg model_formula} must be a {.cls formula}.", + call = get_cli_abort_call() + ) + } + if (!is.factor(data[[arm]])) { + cli::cli_abort( + "Column {.arg {data}[[\"{.var {arm}}\"]]} must be a {.cls factor}.", + call = get_cli_abort_call() + ) + } + + # Determine reference and comparison groups ref_group <- if (!is.null(ref_group)) { ref_group } else { @@ -205,14 +225,19 @@ get_cox_pairwise_tbl <- function(model_formula, data, arm, ref_group = NULL) { ret <- c() for (current_arm in comp_group) { subset_arm <- c(ref_group, current_arm) - assertthat::assert_that(length(subset_arm) == 2, msg = "Make sure 2 arms") + if (length(subset_arm) != 2) { + cli::cli_abort( + "{.arg subset_arm} must contain exactly 2 arms/groups (current length is {length(subset_arm)}).", + call = get_cli_abort_call() + ) + } comp_df <- data[as.character(data[[arm]]) %in% subset_arm, ] suppressWarnings( coxph_ans <- coxph(formula = model_formula, data = comp_df) %>% summary() ) orginal_survdiff <- survdiff(formula = model_formula, data = comp_df) log_rank_pvalue <- 1 - stats::pchisq(orginal_survdiff$chisq, length(orginal_survdiff$n) - - 1) + 1) current_row <- data.frame( hr = sprintf("%.2f", coxph_ans$conf.int[1, 1]), ci = paste0( @@ -230,4 +255,3 @@ get_cox_pairwise_tbl <- function(model_formula, data, arm, ref_group = NULL) { return(ret) } - diff --git a/man/gg_km.Rd b/man/gg_km.Rd index 66399f65..034bb033 100644 --- a/man/gg_km.Rd +++ b/man/gg_km.Rd @@ -6,13 +6,12 @@ \alias{annot_surv_med} \alias{annot_cox_ph} \alias{annot_at_risk} -\title{Kaplan-Meier Plot with ggplot2} +\title{Kaplan-Meier Plot} \usage{ process_survfit_data(fit_km, armval = "All", max_time = NULL) gg_km( surv_plot_data, - col = NULL, lty = NULL, lwd = 0.5, censor_show = TRUE, @@ -24,12 +23,8 @@ gg_km( yval = c("Survival", "Failure"), ylab = paste(yval, "Probability"), ylim = NULL, - title = NULL, - footnotes = NULL, font_size = 10, - ci_ribbon = FALSE, - legend_pos = NULL, - ggtheme = NULL + legend_pos = NULL ) annot_surv_med( @@ -69,10 +64,6 @@ A single numeric value defining the \strong{maximum time point} to display on th A data frame containing the pre-processed survival data, ready for plotting. This data should be equivalent to the output of \code{process_survfit_data}.} -\item{col}{(\code{character})\cr -A character vector of colors (e.g., color names or hexadecimal codes) for the survival curves. -The \strong{length must match the number of arms/groups} being plotted.} - \item{lty}{(\code{numeric} or \code{NULL})\cr A numeric vector of \strong{line types} (e.g., \code{1} for solid, \code{2} for dashed) for the survival curves, or \code{NULL} for \code{ggplot2} defaults. The length should match the number of arms/groups.} @@ -103,17 +94,8 @@ A single character string for the \strong{y-axis label}.} \item{ylim}{(\code{numeric})\cr A \strong{numeric vector of length 2} defining the lower and upper limits of the y-axis (e.g., \code{c(0, 1)}).} -\item{title}{(\code{character})\cr -A single character string for the \strong{plot title}.} - -\item{footnotes}{(\code{character})\cr -A single character string for plot \strong{footnotes or caption}.} - \item{font_size}{Numeric, base font size for the table.} -\item{ci_ribbon}{(\code{logical})\cr -A single logical value indicating whether to display \strong{confidence intervals} as a ribbon (shaded area) around the survival curve. Defaults to \code{TRUE}.} - \item{legend_pos}{(\code{numeric} or \code{NULL})\cr A \strong{numeric vector of length 2} defining the \strong{legend position} as (x, y) coordinates relative to the plot area (ranging from 0 to 1), or \code{NULL} for external, automatic placement.} @@ -142,7 +124,11 @@ The function \code{annot_surv_med} returns a \code{cowplot} object with the Cox- The function \code{annot_at_risk} returns a \code{cowplot} object combining the KM plot and the 'Numbers at Risk' table. } \description{ -The function \code{process_survfit_data} +This set of functions facilitates the creation of Kaplan-Meier survival plots using \code{ggplot2}. Use +\code{process_survfit_data()} to prepare the survival data from a fitted \code{survfit} object, and then +\code{gg_km()} to generate the Kaplan-Meier plot with various customization options. Additional functions +like \code{annot_surv_med()}, \code{annot_cox_ph()}, and \code{annot_at_risk()} allow for adding summary tables and +annotations to the plot. } \details{ Data setup assumes \code{"time"} is event time, \code{"status"} is event indicator (\code{1} represents an event), @@ -185,6 +171,14 @@ plt_kmg01 <- gg_km(surv_plot_data, xlab = "Time (Days)" ) +# Confidence Interval as Ribbon +plt_kmg01 + + ggplot2::geom_ribbon(alpha = 0.3, lty = 0, na.rm = TRUE) + +# Adding Title and Footnotes +plt_kmg01 + + ggplot2::labs(title = "title", caption = "footnotes") + # Annotate Kaplan-Meier Plot with Median Survival Table annot_surv_med(plt_kmg01, fit_kmg01) From 452e26c7acb5e70d5813dffa3c55217d51d9843b Mon Sep 17 00:00:00 2001 From: melkiades Date: Thu, 4 Dec 2025 17:28:32 +0100 Subject: [PATCH 33/51] change name --- NAMESPACE | 2 +- R/crane-package.R | 5 ++--- R/gg_km.R | 10 +++++----- _pkgdown.yml | 8 ++++---- man/gg_km.Rd | 14 +++++++------- tests/testthat/test-gkm.R | 2 +- 6 files changed, 20 insertions(+), 21 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 55964c5c..6c98c780 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -24,7 +24,7 @@ export(label_roche_pvalue) export(label_roche_ratio) export(modify_header_rm_md) export(modify_zero_recode) -export(process_survfit_data) +export(process_survfit) export(remove_duplicate_keys) export(sort_hierarchical) export(style_roche_number) diff --git a/R/crane-package.R b/R/crane-package.R index bb6a7069..cea5e9fa 100644 --- a/R/crane-package.R +++ b/R/crane-package.R @@ -3,8 +3,7 @@ #' @import ggplot2 #' @import glue glue #' @importFrom broom tidy -#' @importFrom cowplot plot_grid -#' @importFrom cowplot ggdraw draw_plot +#' @importFrom cowplot plot_grid ggdraw draw_plot #' @importFrom dplyr across starts_with ends_with contains matches num_range #' all_of any_of everything last_col where #' @importFrom labeling extended @@ -18,7 +17,7 @@ ## usethis namespace: end NULL -utils::globalVariables(c(".", "obj")) +utils::globalVariables(c(".")) # using pkgs to silence NOTE .silence <- function() { diff --git a/R/gg_km.R b/R/gg_km.R index 9c43424c..c6081ec6 100644 --- a/R/gg_km.R +++ b/R/gg_km.R @@ -2,7 +2,7 @@ #' #' @description #' This set of functions facilitates the creation of Kaplan-Meier survival plots using `ggplot2`. Use -#' `process_survfit_data()` to prepare the survival data from a fitted `survfit` object, and then +#' `process_survfit()` to prepare the survival data from a fitted `survfit` object, and then #' `gg_km()` to generate the Kaplan-Meier plot with various customization options. Additional functions #' like `annot_surv_med()`, `annot_cox_ph()`, and `annot_at_risk()` allow for adding summary tables and #' annotations to the plot. @@ -21,7 +21,7 @@ NULL #' A single numeric value defining the **maximum time point** to include in the data, #' or `NULL` for no time limit. #' -#' @return The function `process_survfit_data` returns a data frame containing the survival +#' @return The function `process_survfit` returns a data frame containing the survival #' curve steps, confidence intervals, and censoring info. #' #' @details @@ -40,11 +40,11 @@ NULL #' fit_kmg01 <- survfit(formula, use_lung) #' #' # Process survfit data for plotting -#' surv_plot_data <- process_survfit_data(fit_kmg01) +#' surv_plot_data <- process_survfit(fit_kmg01) #' head(surv_plot_data) #' #' @export -process_survfit_data <- function(fit_km, +process_survfit <- function(fit_km, armval = "All", max_time = NULL) { set_cli_abort_call() @@ -110,7 +110,7 @@ process_survfit_data <- function(fit_km, #' #' @param surv_plot_data (`data.frame`)\cr #' A data frame containing the pre-processed survival data, ready for plotting. -#' This data should be equivalent to the output of `process_survfit_data`. +#' This data should be equivalent to the output of `process_survfit`. #' @param lty (`numeric` or `NULL`)\cr #' A numeric vector of **line types** (e.g., `1` for solid, `2` for dashed) for the survival curves, or `NULL` for `ggplot2` defaults. #' The length should match the number of arms/groups. diff --git a/_pkgdown.yml b/_pkgdown.yml index 94a334ea..a2921271 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -41,11 +41,11 @@ reference: - modify_zero_recode - add_blank_rows - label_roche - - title: "g km plot" + - title: "Kaplan-Meier Plot" contents: - - get_cox_pairwise_tbl - - process_survfit_data + - process_survfit - gg_km - annot_surv_med - - annot_cox_ph - annot_at_risk + - get_cox_pairwise_tbl + - annot_cox_ph diff --git a/man/gg_km.Rd b/man/gg_km.Rd index 034bb033..ec9ab75e 100644 --- a/man/gg_km.Rd +++ b/man/gg_km.Rd @@ -2,13 +2,13 @@ % Please edit documentation in R/gg_km.R \name{gg_km} \alias{gg_km} -\alias{process_survfit_data} +\alias{process_survfit} \alias{annot_surv_med} \alias{annot_cox_ph} \alias{annot_at_risk} \title{Kaplan-Meier Plot} \usage{ -process_survfit_data(fit_km, armval = "All", max_time = NULL) +process_survfit(fit_km, armval = "All", max_time = NULL) gg_km( surv_plot_data, @@ -62,7 +62,7 @@ A single numeric value defining the \strong{maximum time point} to display on th \item{surv_plot_data}{(\code{data.frame})\cr A data frame containing the pre-processed survival data, ready for plotting. -This data should be equivalent to the output of \code{process_survfit_data}.} +This data should be equivalent to the output of \code{process_survfit}.} \item{lty}{(\code{numeric} or \code{NULL})\cr A numeric vector of \strong{line types} (e.g., \code{1} for solid, \code{2} for dashed) for the survival curves, or \code{NULL} for \code{ggplot2} defaults. @@ -112,7 +112,7 @@ A \strong{numeric vector of length 2} defining the \strong{legend position} as ( \item{rel_height_plot}{Numeric, relative height of the main plot area compared to the 'at-risk' table (0 to 1).} } \value{ -The function \code{process_survfit_data} returns a data frame containing the survival +The function \code{process_survfit} returns a data frame containing the survival curve steps, confidence intervals, and censoring info. The function \code{gg_km} returns a \code{ggplot2} object of the KM plot. @@ -125,7 +125,7 @@ The function \code{annot_at_risk} returns a \code{cowplot} object combining the } \description{ This set of functions facilitates the creation of Kaplan-Meier survival plots using \code{ggplot2}. Use -\code{process_survfit_data()} to prepare the survival data from a fitted \code{survfit} object, and then +\code{process_survfit()} to prepare the survival data from a fitted \code{survfit} object, and then \code{gg_km()} to generate the Kaplan-Meier plot with various customization options. Additional functions like \code{annot_surv_med()}, \code{annot_cox_ph()}, and \code{annot_at_risk()} allow for adding summary tables and annotations to the plot. @@ -136,7 +136,7 @@ while \code{"arm"} is the treatment group. } \section{Functions}{ \itemize{ -\item \code{process_survfit_data()}: takes a fitted \link[survival:survfit]{survival::survfit} object and processes it into a data frame +\item \code{process_survfit()}: takes a fitted \link[survival:survfit]{survival::survfit} object and processes it into a data frame suitable for plotting a Kaplan-Meier curve with \code{ggplot2}. Time zero is also added to the data. \item \code{gg_km()}: creates a Kaplan-Meier survival curve, with @@ -163,7 +163,7 @@ formula <- Surv(time, status) ~ arm fit_kmg01 <- survfit(formula, use_lung) # Process survfit data for plotting -surv_plot_data <- process_survfit_data(fit_kmg01) +surv_plot_data <- process_survfit(fit_kmg01) head(surv_plot_data) # Example of making the KM plot diff --git a/tests/testthat/test-gkm.R b/tests/testthat/test-gkm.R index 3c64b849..fd05c0ea 100644 --- a/tests/testthat/test-gkm.R +++ b/tests/testthat/test-gkm.R @@ -19,7 +19,7 @@ test_that("test gkm() works", { fit_kmg01 <- survival::survfit(model_formula, anl) - expect_no_error(surv_plot_data <- process_survfit_data(fit_kmg01)) + expect_no_error(surv_plot_data <- process_survfit(fit_kmg01)) expect_no_error( suppressWarnings( From f7bdf13672b7c1b66951cb2ac73c73aed6f42985 Mon Sep 17 00:00:00 2001 From: melkiades Date: Mon, 8 Dec 2025 17:20:34 +0100 Subject: [PATCH 34/51] finilizing --- NAMESPACE | 8 +- R/annotate_gg_km.R | 289 ++++++++++++++++++++++++++++++++++++ R/get_cox_pairwise_df.R | 119 +++++++++++++++ R/gg_km.R | 220 ++++----------------------- R/gg_km_utils.R | 149 ++----------------- R/tbl_shift.R | 2 +- _pkgdown.yml | 6 +- inst/WORDLIST | 5 +- man/annotate_gg_km.Rd | 110 ++++++++++++++ man/get_cox_pairwise_df.Rd | 76 ++++++++++ man/get_cox_pairwise_tbl.Rd | 51 ------- man/gg_km.Rd | 101 +++---------- man/h_tbl_median_surv.Rd | 6 +- tests/testthat/test-gg_km.R | 38 +++++ tests/testthat/test-gkm.R | 43 ------ 15 files changed, 709 insertions(+), 514 deletions(-) create mode 100644 R/annotate_gg_km.R create mode 100644 R/get_cox_pairwise_df.R create mode 100644 man/annotate_gg_km.Rd create mode 100644 man/get_cox_pairwise_df.Rd delete mode 100644 man/get_cox_pairwise_tbl.Rd create mode 100644 tests/testthat/test-gg_km.R delete mode 100644 tests/testthat/test-gkm.R diff --git a/NAMESPACE b/NAMESPACE index 6c98c780..fbe98ee9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,11 +12,11 @@ export(add_blank_rows) export(add_difference_row) export(add_hierarchical_count_row) export(add_overall) -export(annot_at_risk) -export(annot_cox_ph) -export(annot_surv_med) +export(annotate_coxph) +export(annotate_risk) +export(annotate_surv_med) export(filter_hierarchical) -export(get_cox_pairwise_tbl) +export(get_cox_pairwise_df) export(gg_km) export(label_roche_number) export(label_roche_percent) diff --git a/R/annotate_gg_km.R b/R/annotate_gg_km.R new file mode 100644 index 00000000..c4b6051c --- /dev/null +++ b/R/annotate_gg_km.R @@ -0,0 +1,289 @@ +#' Annotate Kaplan-Meier Plot +#' +#' @description +#' These functions provide capabilities to annotate Kaplan-Meier plots ([gg_km()]) with additional summary tables, +#' including median survival times, numbers at risk, and cox proportional hazards results. +#' The annotations are added using the `cowplot` package for flexible placement. +#' +#' @seealso [gg_km()], [process_survfit()], and [get_cox_pairwise_df()] for related functionalities. +#' +#' @examples +#' # Preparing the Kaplan-Meier Plot +#' use_lung <- lung +#' use_lung$arm <- factor(sample(c("A", "B", "C"), nrow(use_lung), replace = TRUE)) +#' use_lung$status <- use_lung$status - 1 # Convert status to 0/1 +#' use_lung <- na.omit(use_lung) +#' +#' fit_kmg01 <- survfit(Surv(time, status) ~ arm, use_lung) +#' surv_plot_data <- process_survfit(fit_kmg01) +#' +#' plt_kmg01 <- gg_km(surv_plot_data) +#' +#' @name annotate_gg_km +NULL + +#' @describeIn annotate_gg_km The `annotate_surv_med` function adds a median survival time summary table as an +#' annotation box. +#' +#' @param gg_plt (`ggplot2` or `cowplot`)\cr +#' The primary plot object (either a `ggplot2` or `cowplot` object) of the Kaplan-Meier plot to which the median +#' survival table annotation will be added. +#' @param fit_km (`survfit`)\cr +#' A fitted Kaplan-Meier object of class `survfit` (from the `survival` package). This object contains the necessary +#' survival data used to calculate and generate the content displayed in the annotation table. +#' @param ... Additional arguments passed to the control list for the annotation box. +#' These arguments override the default values. +#' Accepted arguments include: +#' \itemize{ +#' \item \code{x} (\code{numeric}): X-coordinate for the box anchor position (0 to 1). Default is +#' \code{0.8} (\code{0.29} for `annotate_coxph`). +#' \item \code{y} (\code{numeric}): Y-coordinate for the box anchor position (0 to 1). Default is +#' \code{0.85} (\code{0.51} for `annotate_coxph`). +#' \item \code{w} (\code{numeric}): Width of the annotation box (0 to 1). Default is +#' \code{0.32} (\code{0.4} for `annotate_coxph`). +#' \item \code{h} (\code{numeric}): Height of the annotation box (0 to 1). Default +#' is \code{0.16} (\code{0.125} for `annotate_coxph`). +#' \item \code{fill} (\code{logical}): Whether the annotation box should have a background fill. Default is +#' \code{TRUE}. +#' \item \code{font_size} (\code{numeric}): Base font size for the text inside the annotation box. Default +#' is \code{10}. +#' } +#' +#' @return The function `annotate_surv_med` returns a `cowplot` object with the median survival table annotation +#' added, ready for final display or saving. +#' +#' @examples +#' # Annotate Kaplan-Meier Plot with Median Survival Table +#' annotate_surv_med(plt_kmg01, fit_kmg01) +#' +#' @export +annotate_surv_med <- function(gg_plt, fit_km, ...) { + set_cli_abort_call() + default_eargs <- list( + x = 0.8, + y = 0.85, + w = 0.32, + h = 0.16, + font_size = 10, + fill = TRUE + ) + eargs <- list(...) + eargs <- utils::modifyList(default_eargs, eargs) + + # Checks + check_class(fit_km, "survfit") + check_class(gg_plt, c("gg", "ggplot", "cowplot")) + + # Check position/size (x, y, w, h, font_size) - Must be single non-missing numeric + for (arg_name in c("x", "y", "w", "h", "font_size")) { + check_numeric(eargs[[arg_name]]) + } + check_logical(eargs[["fill"]]) + + # Determine strata_levels for h_tbl_median_surv, assuming it's available in the calling environment or logic should + # be updated. For now, keeping as is, but this typically requires strata_levels or inferring it from fit_km + strata_levels <- if (is.null(fit_km$strata)) "All" else levels(fit_km$strata) # Placeholder for strata_levels + + surv_med_tbl <- h_tbl_median_surv(fit_km = fit_km, strata_levels = strata_levels) + bg_fill <- if (isTRUE(eargs[["fill"]])) "#00000020" else eargs[["fill"]] + + gg_surv_med <- df2gg(surv_med_tbl, font_size = eargs[["font_size"]], colwidths = c(1, 1, 2), bg_fill = bg_fill) + + ggplot2::theme( + axis.text.y = ggplot2::element_text(size = eargs[["font_size"]], face = "italic", hjust = 1), + plot.margin = ggplot2::margin(0, 2, 0, 5) + ) + + ggplot2::coord_cartesian(clip = "off", ylim = c(0.5, nrow(surv_med_tbl) + 1.5)) + gg_surv_med <- suppressMessages( + gg_surv_med + + ggplot2::scale_x_continuous(expand = c(0.025, 0)) + + ggplot2::scale_y_continuous(labels = rev(rownames(surv_med_tbl)), breaks = seq_len(nrow(surv_med_tbl))) + ) + + gg_plt <- cowplot::ggdraw(gg_plt) + + cowplot::draw_plot( + gg_surv_med, eargs[["x"]], eargs[["y"]], + width = eargs[["w"]], height = eargs[["h"]], + vjust = 0.5, hjust = 0.5 + ) + gg_plt +} + +#' @describeIn annotate_gg_km The function `annotate_risk` adds a "Numbers at Risk" table below a +#' Kaplan-Meier plot ([gg_km()]) using `cowplot::plot_grid`. +#' +#' @param gg_plt (`ggplot2` or `cowplot`)\cr +#' The primary plot object (either a `ggplot2` or `cowplot` object) of the Kaplan-Meier plot. +#' @param fit_km (`survfit`)\cr +#' A fitted Kaplan-Meier object of class `survfit` (from the `survival` package). This object contains +#' the necessary survival data used to calculate the numbers at risk. +#' @param title (`string`)\cr +#' A single logical value indicating whether to include a above the table. Defaults to +#' `""Patients at Risk:""`. If `NULL`, no title is added. +#' @param rel_height_plot (`numeric`)\cr +#' A single numeric value defining the **relative height** of the main Kaplan-Meier plot area compared +#' to the 'at-risk' table. This value should be between 0 and 1, where a value closer to 1 gives the main plot +#' more vertical space. Defaults to `0.75`. +#' @param xlab (`character`)\cr +#' A single character string for the **x-axis label** on the 'at-risk' table. This typically represents +#' time (e.g., "Time (Days)"). +#' @return The function `annotate_risk` returns a `cowplot` object combining the KM plot and the 'Numbers at Risk' +#' table. +#' @examples +#' # Annotate Plot with Numbers at Risk Table +#' annotate_risk(plt_kmg01, fit_kmg01) +#' +#' @export +annotate_risk <- function(gg_plt, fit_km, title = "Patients at Risk:", + rel_height_plot = 0.75, xlab = "Days", + ...) { + check_class(gg_plt, c("gg", "ggplot", "cowplot")) + check_class(fit_km, "survfit") + check_string(title, allow_empty = TRUE) + check_scalar(rel_height_plot) + check_numeric(rel_height_plot) + if (rel_height_plot <= 0 || rel_height_plot >= 1) { + cli::cli_abort( + "{.arg rel_height_plot} must be a single {.cls numeric} value between 0 and 1 (exclusive).", + call = get_cli_abort_call() + ) + } + check_string(xlab) + default_eargs <- list( + font_size = 10 + ) + eargs <- list(...) + eargs <- utils::modifyList(default_eargs, eargs) + font_size <- eargs[["font_size"]] + check_numeric(font_size) + + data <- broom::tidy(fit_km) + xticks <- h_xticks(data = data) + annot_tbl <- summary(fit_km, times = xticks, extend = TRUE) + + # Placeholder for strata_levels, should be retrieved from fit_km or passed as argument + strata_levels <- if (is.null(fit_km$strata)) "All" else levels(fit_km$strata) + + annot_tbl <- if (is.null(fit_km$strata)) { + data.frame( + n.risk = annot_tbl$n.risk, + time = annot_tbl$time, + strata = strata_levels + ) + } else { + strata_lst <- strsplit(sub("=", "equals", levels(annot_tbl$strata)), "equals") + levels(annot_tbl$strata) <- matrix(unlist(strata_lst), ncol = 2, byrow = TRUE)[, 2] + data.frame( + n.risk = annot_tbl$n.risk, + time = annot_tbl$time, + strata = annot_tbl$strata + ) + } + + at_risk_tbl <- as.data.frame( + tidyr::pivot_wider(annot_tbl, names_from = "time", values_from = "n.risk")[, -1] + ) + at_risk_tbl[is.na(at_risk_tbl)] <- 0 + rownames(at_risk_tbl) <- levels(annot_tbl$strata) + + gg_at_risk <- df2gg( + at_risk_tbl, + font_size = eargs$font_size, col_labels = FALSE, hline = FALSE, + colwidths = rep(1, ncol(at_risk_tbl)) + ) + + ggplot2::labs(title = if (!is.null(title)) title else NULL, x = xlab) + + ggplot2::theme_bw(base_size = eargs$font_size) + + ggplot2::theme( + plot.title = ggplot2::element_text(size = eargs$font_size, vjust = 3, face = "bold"), + panel.border = ggplot2::element_blank(), + panel.grid = ggplot2::element_blank(), + axis.title.y = ggplot2::element_blank(), + axis.ticks.y = ggplot2::element_blank(), + axis.text.y = ggplot2::element_text(size = eargs$font_size, face = "italic", hjust = 1), + axis.text.x = ggplot2::element_text(size = eargs$font_size), + axis.line.x = ggplot2::element_line() + ) + + ggplot2::coord_cartesian(clip = "off", ylim = c(0.5, nrow(at_risk_tbl))) + gg_at_risk <- suppressMessages( + gg_at_risk + + ggplot2::scale_x_continuous(expand = c(0.1, 0), breaks = seq_along(at_risk_tbl) - 0.5, labels = xticks) + + ggplot2::scale_y_continuous(labels = rev(levels(annot_tbl$strata)), breaks = seq_len(nrow(at_risk_tbl))) + ) + + gg_plt <- cowplot::plot_grid( + gg_plt, gg_at_risk, + align = "v", axis = "tblr", ncol = 1, + rel_heights = c(rel_height_plot, 1 - rel_height_plot) + ) + gg_plt +} + + +#' @describeIn annotate_gg_km The function `annotate_coxph()` adds a Cox Proportional Hazards summary table created by +#' the function [get_cox_pairwise_df()] as an annotation box. +#' +#' @param gg_plt (`ggplot2` or `cowplot`)\cr +#' The primary plot object (either a `ggplot2` or `cowplot` object) of the Kaplan-Meier plot to which +#' the Cox-PH annotation table will be added. +#' @param coxph_tbl (`data.frame`)\cr +#' A data frame containing the pre-calculated Cox-PH results, typically from a function like `get_cox_pairwise_df`. +#' This data is used to generate the annotation table content. +#' @param ... Additional arguments passed to the control list for the annotation box. +#' These arguments override the default values. +#' Accepted arguments include: +#' \itemize{ +#' \item \code{x} (`numeric`): X-coordinate for the box anchor position (0 to 1). Default is \code{0.29}. +#' \item \code{y} (`numeric`): Y-coordinate for the box anchor position (0 to 1). Default is \code{0.51}. +#' \item \code{w} (`numeric`): Width of the annotation box (0 to 1). Default is \code{0.4}. +#' \item \code{h} (`numeric`): Height of the annotation box (0 to 1). Default is \code{0.125}. +#' } +#' +#' @return The function `annotate_coxph` returns a `cowplot` object with the Cox-PH table annotation added. +#' +#' @examples +#' # Annotate Kaplan-Meier Plot with Cox-PH Table +#' coxph_tbl <- get_cox_pairwise_df(formula, data = use_lung, arm = "arm", ref_group = "A") +#' annotate_coxph(plt_kmg01, coxph_tbl) +#' +#' @export +annotate_coxph <- function(gg_plt, coxph_tbl, ...) { + set_cli_abort_call() + default_eargs <- list( + x = 0.29, + y = 0.51, + w = 0.4, + h = 0.125, + fill = TRUE, + font_size = 10 + ) + eargs <- list(...) + eargs <- utils::modifyList(default_eargs, eargs) + + # Check position/size (x, y, w, h, font_size) - Must be single non-missing numeric + for (arg_name in c("x", "y", "w", "h", "font_size")) { + check_numeric(eargs[[arg_name]]) + } + check_logical(eargs[["fill"]]) + + bg_fill <- if (isTRUE(eargs[["fill"]])) "#00000020" else eargs[["fill"]] + + gg_coxph <- df2gg(coxph_tbl, font_size = eargs$font_size, colwidths = c(1.1, 1, 3), bg_fill = bg_fill) + + ggplot2::theme( + axis.text.y = ggplot2::element_text(size = eargs$font_size, face = "italic", hjust = 1), + plot.margin = ggplot2::margin(0, 2, 0, 5) + ) + + ggplot2::coord_cartesian(clip = "off", ylim = c(0.5, nrow(coxph_tbl) + 1.5)) + gg_coxph <- suppressMessages( + gg_coxph + + ggplot2::scale_x_continuous(expand = c(0.025, 0)) + + ggplot2::scale_y_continuous(labels = rev(rownames(coxph_tbl)), breaks = seq_len(nrow(coxph_tbl))) + ) + + gg_plt <- cowplot::ggdraw(gg_plt) + + cowplot::draw_plot( + gg_coxph, eargs[["x"]], eargs[["y"]], + width = eargs[["w"]], height = eargs[["h"]], + vjust = 0.5, hjust = 0.5 + ) + + gg_plt +} diff --git a/R/get_cox_pairwise_df.R b/R/get_cox_pairwise_df.R new file mode 100644 index 00000000..cdaa4ceb --- /dev/null +++ b/R/get_cox_pairwise_df.R @@ -0,0 +1,119 @@ +#' Generate Table of Pairwise Cox-PH and Log-Rank Results +#' +#' @description +#' This function performs pairwise comparisons of treatment arms using the **Cox Proportional Hazards model** and +#' calculates the corresponding **log-rank p-value**. Each comparison tests a non-reference group against a specified +#' reference group. +#' +#' @param model_formula (`formula`)\cr +#' A `formula` object specifying the survival model, typically in the form `Surv(time, status) ~ arm + covariates`. +#' @param data (`data.frame`)\cr +#' A `data.frame` containing the survival data, including time, status, and the arm variable. +#' @param arm (`character`)\cr +#' A single character string specifying the name of the column in `data` that contains the grouping/treatment +#' **arm variable**. This column **must be a factor** for correct stratification and comparison. +#' @param ref_group (`character` or `NULL`)\cr +#' A single character string specifying the level of the `arm` variable to be used as the **reference group** for +#' all pairwise comparisons. If `NULL` (the default), the **first unique level** of the `arm` column is automatically +#' selected as the reference group. +#' +#' @return A `data.frame` with the results of the pairwise comparisons. The columns include: +#' \itemize{ +#' \item `arm`: (rownames of the `data.frame`) The comparison arm (group) being tested against the reference group. +#' \item `hr`: The Hazard Ratio (HR) for the comparison arm vs. the reference arm, formatted to two decimal places. +#' \item `ci`: The 95% confidence interval for the HR, presented as a string in the format "(lower, upper)", with +#' values formatted to two decimal places. +#' \item `pval`: The log-rank p-value for the comparison. +#' } +#' +#' @details The function iterates through each unique arm (excluding the reference group). For each iteration, it +#' filters the data to include only the current comparison arm and the reference arm, and then: +#' \itemize{ +#' \item Fits a Cox model using `survival::coxph`. +#' \item Performs a log-rank test using `survival::survdiff`. +#' } +#' The Hazard Ratio and its 95% confidence interval are extracted from the Cox model summary, and the p-value is +#' extracted from the log-rank test. +#' +#' @seealso `annotate_gg_km()`, `gg_km()`, and the `survival` package functions `survival::coxph` and +#' `survival::survdiff`. +#' +#' @examples +#' # Example data setup (assuming 'time' is event time, 'status' is event indicator (1=event), +#' # and 'arm' is the treatment group) +#' library(dplyr) # For better data handling +#' +#' # Prepare data in a modern dplyr-friendly way +#' surv_data <- lung |> +#' mutate( +#' arm = factor(sample(c("A", "B", "C"), n(), replace = TRUE)), +#' status = status - 1 # Convert status to 0/1 +#' ) |> +#' filter(if_all(everything(), ~ !is.na(.))) +#' +#' formula <- Surv(time, status) ~ arm +#' results_tbl <- get_cox_pairwise_df( +#' model_formula = formula, +#' data = surv_data, +#' arm = "arm", +#' ref_group = "A" +#' ) +#' print(results_tbl) +#' +#' @export +get_cox_pairwise_df <- function(model_formula, data, arm, ref_group = NULL) { + set_cli_abort_call() + # Input checks + if (!rlang::is_formula(model_formula)) { + cli::cli_abort( + "{.arg model_formula} must be a {.cls formula}.", + call = get_cli_abort_call() + ) + } + if (!is.factor(data[[arm]])) { + cli::cli_abort( + "Column {.arg {data}[[\"{.var {arm}}\"]]} must be a {.cls factor}.", + call = get_cli_abort_call() + ) + } + + # Determine reference and comparison groups + ref_group <- if (!is.null(ref_group)) { + ref_group + } else { + levels(data[[arm]])[1] + } + comp_group <- setdiff(levels(data[[arm]]), ref_group) + + ret <- c() + for (current_arm in comp_group) { + subset_arm <- c(ref_group, current_arm) + if (length(subset_arm) != 2) { + cli::cli_abort( + "{.arg subset_arm} must contain exactly 2 arms/groups (current length is {length(subset_arm)}).", + call = get_cli_abort_call() + ) + } + comp_df <- data[as.character(data[[arm]]) %in% subset_arm, ] + suppressWarnings( + coxph_ans <- coxph(formula = model_formula, data = comp_df) |> summary() + ) + orginal_survdiff <- survdiff(formula = model_formula, data = comp_df) + log_rank_pvalue <- 1 - stats::pchisq(orginal_survdiff$chisq, length(orginal_survdiff$n) - 1) + current_row <- data.frame( + hr = sprintf("%.2f", coxph_ans$conf.int[1, 1]), + ci = paste0( + "(", + sprintf("%.2f", coxph_ans$conf.int[1, 3]), + ", ", + sprintf("%.2f", coxph_ans$conf.int[1, 4]), + ")" + ), + pval = log_rank_pvalue + ) + rownames(current_row) <- current_arm + ret <- rbind(ret, current_row) + } + + ret +} diff --git a/R/gg_km.R b/R/gg_km.R index c6081ec6..4717c038 100644 --- a/R/gg_km.R +++ b/R/gg_km.R @@ -4,7 +4,7 @@ #' This set of functions facilitates the creation of Kaplan-Meier survival plots using `ggplot2`. Use #' `process_survfit()` to prepare the survival data from a fitted `survfit` object, and then #' `gg_km()` to generate the Kaplan-Meier plot with various customization options. Additional functions -#' like `annot_surv_med()`, `annot_cox_ph()`, and `annot_at_risk()` allow for adding summary tables and +#' like `annot_surv_med()`, `annot_cox_ph()`, and `annotate_risk()` allow for adding summary tables and #' annotations to the plot. #' #' @name gg_km @@ -14,7 +14,7 @@ NULL #' suitable for plotting a Kaplan-Meier curve with `ggplot2`. Time zero is also added to the data. #' #' @param fit_km A fitted Kaplan-Meier object of class `survfit`. -#' @param armval (`string`)\cr +#' @param strata_levels (`string`)\cr #' A single character string used as the strata level if the input `fit_km` object #' has no strata (e.g., `"All"`). #' @param max_time (`numeric` or `NULL`)\cr @@ -45,8 +45,8 @@ NULL #' #' @export process_survfit <- function(fit_km, - armval = "All", - max_time = NULL) { + strata_levels = "All", + max_time = NULL) { set_cli_abort_call() # Input checks @@ -56,7 +56,7 @@ process_survfit <- function(fit_km, call = get_cli_abort_call() ) } - check_string(armval) + check_string(strata_levels) check_numeric(max_time, allow_empty = TRUE) y <- broom::tidy(fit_km) @@ -71,7 +71,7 @@ process_survfit <- function(fit_km, levels = strata_levels ) } else { - y$strata <- armval + y$strata <- strata_levels } # Extend to time zero @@ -112,8 +112,8 @@ process_survfit <- function(fit_km, #' A data frame containing the pre-processed survival data, ready for plotting. #' This data should be equivalent to the output of `process_survfit`. #' @param lty (`numeric` or `NULL`)\cr -#' A numeric vector of **line types** (e.g., `1` for solid, `2` for dashed) for the survival curves, or `NULL` for `ggplot2` defaults. -#' The length should match the number of arms/groups. +#' A numeric vector of **line types** (e.g., `1` for solid, `2` for dashed) for the survival curves, or `NULL` for +#' `ggplot2` defaults. The length should match the number of arms/groups. #' @param lwd (`numeric`)\cr #' A single numeric value specifying the **line width** for the survival curves. #' @param censor_show (`logical`)\cr @@ -123,29 +123,25 @@ process_survfit <- function(fit_km, #' @param size (`numeric`)\cr #' A single numeric value specifying the **size** of the censoring marks. #' @param max_time (`numeric`)\cr -#' A single numeric value defining the **maximum time point** to display on the x-axis. Data points beyond this time will be clipped. +#' A single numeric value defining the **maximum time point** to display on the x-axis. #' @param xticks (`numeric` or `NULL`)\cr -#' A numeric vector of explicit **x-axis tick positions**, or a single numeric value representing the **interval** between ticks, or `NULL` for automatic `ggplot2` scaling. -#' @param xlab (`character`)\cr -#' A single character string for the **x-axis label**. +#' A numeric vector of explicit **x-axis tick positions**, or a single numeric value representing the **interval** +#' between ticks, or `NULL` for automatic `ggplot2` scaling. #' @param yval (`character`)\cr -#' A single character string, either `"Survival"` or `"Failure"` to plot the corresponding probability. Case sensitive. -#' @param ylab (`character`)\cr -#' A single character string for the **y-axis label**. +#' A single character string, either `"Survival"` or `"Failure"` to plot the corresponding probability. #' @param ylim (`numeric`)\cr #' A **numeric vector of length 2** defining the lower and upper limits of the y-axis (e.g., `c(0, 1)`). #' @param font_size (`numeric`)\cr #' A single numeric value specifying the **base font size** for the plot theme elements. #' @param legend_pos (`numeric` or `NULL`)\cr -#' A **numeric vector of length 2** defining the **legend position** as (x, y) coordinates relative to the plot area (ranging from 0 to 1), or `NULL` for external, automatic placement. +#' A **numeric vector of length 2** defining the **legend position** as (x, y) coordinates relative to the plot +#' area (ranging from 0 to 1), or `NULL` for automatic placement. #' #' @return The function `gg_km` returns a `ggplot2` object of the KM plot. #' #' @examples #' # Example of making the KM plot -#' plt_kmg01 <- gg_km(surv_plot_data, -#' xlab = "Time (Days)" -#' ) +#' plt_kmg01 <- gg_km(surv_plot_data) #' #' # Confidence Interval as Ribbon #' plt_kmg01 + @@ -155,6 +151,11 @@ process_survfit <- function(fit_km, #' plt_kmg01 + #' ggplot2::labs(title = "title", caption = "footnotes") #' +#' # Changing xlab and ylab +#' plt_kmg01 + +#' xlab("Another Day") + +#' ylab("THE Survival Probability") +#' #' @export gg_km <- function(surv_plot_data, lty = NULL, @@ -164,9 +165,7 @@ gg_km <- function(surv_plot_data, size = 2, max_time = NULL, xticks = NULL, - xlab = "Days", yval = c("Survival", "Failure"), - ylab = paste(yval, "Probability"), ylim = NULL, font_size = 10, legend_pos = NULL) { @@ -178,7 +177,8 @@ gg_km <- function(surv_plot_data, if (!all(needed_cols %in% colnames(surv_plot_data))) { cli::cli_abort( "The input {.arg surv_plot_data} must contain the following columns: ", - "{.code time}, {.code estimate}, {.code conf.low}, {.code conf.high}, {.code strata}, {.code n.censor}, and {.code censor}.", + "{.code time}, {.code estimate}, {.code conf.low}, {.code conf.high}, ", + "{.code strata}, {.code n.censor}, and {.code censor}.", call = get_cli_abort_call() ) } @@ -188,15 +188,12 @@ gg_km <- function(surv_plot_data, call = get_cli_abort_call() ) } + check_numeric(ylim, allow_empty = TRUE) data <- surv_plot_data - - armval <- levels(data$strata) + strata_levels <- levels(data$strata) yval <- match.arg(yval) - - xticks <- h_xticks(data = data, xticks = xticks, max_time = max_time) - if (yval == "Failure") { data[c("estimate", "conf.low", "conf.high", "censor")] <- list( 1 - data$estimate, 1 - data$conf.low, 1 - data$conf.high, 1 - data$censor @@ -214,6 +211,8 @@ gg_km <- function(surv_plot_data, ylim <- c(y_lwr, y_upr) } + xticks <- h_xticks(data = data, xticks = xticks, max_time = max_time) + gg_plt <- ggplot2::ggplot( data = data, mapping = ggplot2::aes( @@ -223,7 +222,7 @@ gg_km <- function(surv_plot_data, ) + ggplot2::theme_bw(base_size = font_size) + ggplot2::scale_y_continuous(limits = ylim, expand = c(0.025, 0)) + - ggplot2::labs(x = xlab, y = ylab) + + ggplot2::labs(x = "Days", y = paste(yval, "Probability")) + ggplot2::theme( axis.text = ggplot2::element_text(size = font_size), axis.title = ggplot2::element_text(size = font_size), @@ -258,8 +257,8 @@ gg_km <- function(surv_plot_data, } else { max_time2 <- sort( data$time, - partial = nrow(data) - length(armval) - 1 - )[nrow(data) - length(armval) - 1] + partial = nrow(data) - length(strata_levels) - 1 + )[nrow(data) - length(strata_levels) - 1] y_rng <- ylim[2] - ylim[1] @@ -302,164 +301,3 @@ gg_km <- function(surv_plot_data, gg_plt } - -#' @describeIn gg_km The \code{annot_surv_med} function adds a median survival time summary table as an annotation box on a -#' Kaplan-Meier plot using \code{cowplot}. -#' -#' @param gg_plt A \code{ggplot2} or \code{cowplot} object of the Kaplan-Meier plot. -#' @param fit_km A fitted Kaplan-Meier object of class \code{survfit}, used to generate the table data. -#' @param control_annot_surv_med A list of control parameters for the annotation box. -#' @param font_size Numeric, base font size for the annotation table. -#' -#' @return The \code{annot_surv_med} function returns a \code{cowplot} object with the median survival table annotation added. -#' -#' @examples -#' # Annotate Kaplan-Meier Plot with Median Survival Table -#' annot_surv_med(plt_kmg01, fit_kmg01) -#' -#' @export -annot_surv_med <- function(gg_plt, fit_km, control_annot_surv_med = control_surv_med_annot(), font_size = 10) { - # Determine armval for h_tbl_median_surv, assuming it's available in the calling environment or logic should be updated - # For now, keeping as is, but this typically requires armval or inferring it from fit_km - armval <- if (is.null(fit_km$strata)) "All" else levels(fit_km$strata) # Placeholder for armval - - surv_med_tbl <- h_tbl_median_surv(fit_km = fit_km, armval = armval) - bg_fill <- if (isTRUE(control_annot_surv_med[["fill"]])) "#00000020" else control_annot_surv_med[["fill"]] - - gg_surv_med <- df2gg(surv_med_tbl, font_size = font_size, colwidths = c(1, 1, 2), bg_fill = bg_fill) + - ggplot2::theme( - axis.text.y = ggplot2::element_text(size = font_size, face = "italic", hjust = 1), - plot.margin = ggplot2::margin(0, 2, 0, 5) - ) + - ggplot2::coord_cartesian(clip = "off", ylim = c(0.5, nrow(surv_med_tbl) + 1.5)) - gg_surv_med <- suppressMessages( - gg_surv_med + - ggplot2::scale_x_continuous(expand = c(0.025, 0)) + - ggplot2::scale_y_continuous(labels = rev(rownames(surv_med_tbl)), breaks = seq_len(nrow(surv_med_tbl))) - ) - - gg_plt <- cowplot::ggdraw(gg_plt) + - cowplot::draw_plot( - gg_surv_med, control_annot_surv_med[["x"]], control_annot_surv_med[["y"]], - width = control_annot_surv_med[["w"]], height = control_annot_surv_med[["h"]], - vjust = 0.5, hjust = 0.5 - ) - gg_plt -} - -#' @describeIn gg_km The function \code{annot_cox_ph} adds a Cox Proportional Hazards summary table created by the function \code{\link{get_cox_pairwise_tbl}} as an annotation box on a -#' Kaplan-Meier plot using \code{cowplot}. -#' -#' @param gg_plt A \code{ggplot2} or \code{cowplot} object of the Kaplan-Meier plot. -#' @param coxph_tbl A data frame containing pre-calculated Cox-PH results. -#' @param control_annot_coxph A list of control parameters for the annotation box. -#' @param font_size Numeric, base font size for the annotation table. -#' -#' @return The function \code{annot_surv_med} returns a \code{cowplot} object with the Cox-PH table annotation added. -#' -#' @examples -#' # Annotate Kaplan-Meier Plot with Cox-PH Table -#' coxph_tbl <- get_cox_pairwise_tbl(formula, data = use_lung, arm = "arm", ref_group = "A") -#' annot_cox_ph(plt_kmg01, coxph_tbl) -#' -#' @export -annot_cox_ph <- function(gg_plt, coxph_tbl, control_annot_coxph = control_coxph_annot(), font_size = 10) { - # ... (function body remains the same) - bg_fill <- if (isTRUE(control_annot_coxph[["fill"]])) "#00000020" else control_annot_coxph[["fill"]] - - gg_coxph <- df2gg(coxph_tbl, font_size = font_size, colwidths = c(1.1, 1, 3), bg_fill = bg_fill) + - ggplot2::theme( - axis.text.y = ggplot2::element_text(size = font_size, face = "italic", hjust = 1), - plot.margin = ggplot2::margin(0, 2, 0, 5) - ) + - ggplot2::coord_cartesian(clip = "off", ylim = c(0.5, nrow(coxph_tbl) + 1.5)) - gg_coxph <- suppressMessages( - gg_coxph + - ggplot2::scale_x_continuous(expand = c(0.025, 0)) + - ggplot2::scale_y_continuous(labels = rev(rownames(coxph_tbl)), breaks = seq_len(nrow(coxph_tbl))) - ) - - gg_plt <- cowplot::ggdraw(gg_plt) + - cowplot::draw_plot( - gg_coxph, control_annot_coxph[["x"]], control_annot_coxph[["y"]], - width = control_annot_coxph[["w"]], height = control_annot_coxph[["h"]], - vjust = 0.5, hjust = 0.5 - ) - gg_plt -} - - -#' @describeIn gg_km The function \code{annot_at_risk} adds a "Numbers at Risk" table below a Kaplan-Meier plot using \code{cowplot::plot_grid}. -#' -#' @param gg_plt A \code{ggplot2} or \code{cowplot} object of the Kaplan-Meier plot. -#' @param fit_km A fitted Kaplan-Meier object of class \code{survfit}, used to generate the table data. -#' @param font_size Numeric, base font size for the table. -#' @param annot_at_risk_title Logical, whether to include the title "Patients at Risk:". -#' @param rel_height_plot Numeric, relative height of the main plot area compared to the 'at-risk' table (0 to 1). -#' @param xlab Character string for the x-axis label on the 'at-risk' table (typically time). -#' @return The function \code{annot_at_risk} returns a \code{cowplot} object combining the KM plot and the 'Numbers at Risk' table. -#' -#' @examples -#' # Annotate Plot with Numbers at Risk Table -#' annot_at_risk(plt_kmg01, fit_kmg01) -#' -#' @export -annot_at_risk <- function(gg_plt, fit_km, font_size = 10, annot_at_risk_title = TRUE, rel_height_plot = 0.75, xlab = "Days") { - data <- broom::tidy(fit_km) - xticks <- h_xticks(data = data) - annot_tbl <- summary(fit_km, times = xticks, extend = TRUE) - - # Placeholder for armval, should be retrieved from fit_km or passed as argument - armval <- if (is.null(fit_km$strata)) "All" else levels(fit_km$strata) - - annot_tbl <- if (is.null(fit_km$strata)) { - data.frame( - n.risk = annot_tbl$n.risk, - time = annot_tbl$time, - strata = armval - ) - } else { - strata_lst <- strsplit(sub("=", "equals", levels(annot_tbl$strata)), "equals") - levels(annot_tbl$strata) <- matrix(unlist(strata_lst), ncol = 2, byrow = TRUE)[, 2] - data.frame( - n.risk = annot_tbl$n.risk, - time = annot_tbl$time, - strata = annot_tbl$strata - ) - } - - at_risk_tbl <- as.data.frame(tidyr::pivot_wider(annot_tbl, names_from = "time", values_from = "n.risk")[, -1]) - at_risk_tbl[is.na(at_risk_tbl)] <- 0 - rownames(at_risk_tbl) <- levels(annot_tbl$strata) - - gg_at_risk <- df2gg( - at_risk_tbl, - font_size = font_size, col_labels = FALSE, hline = FALSE, - colwidths = rep(1, ncol(at_risk_tbl)) - ) + - ggplot2::labs(title = if (annot_at_risk_title) "Patients at Risk:" else NULL, x = xlab) + - ggplot2::theme_bw(base_size = font_size) + - ggplot2::theme( - plot.title = ggplot2::element_text(size = font_size, vjust = 3, face = "bold"), - panel.border = ggplot2::element_blank(), - panel.grid = ggplot2::element_blank(), - axis.title.y = ggplot2::element_blank(), - axis.ticks.y = ggplot2::element_blank(), - axis.text.y = ggplot2::element_text(size = font_size, face = "italic", hjust = 1), - axis.text.x = ggplot2::element_text(size = font_size), - axis.line.x = ggplot2::element_line() - ) + - ggplot2::coord_cartesian(clip = "off", ylim = c(0.5, nrow(at_risk_tbl))) - gg_at_risk <- suppressMessages( - gg_at_risk + - ggplot2::scale_x_continuous(expand = c(0.1, 0), breaks = seq_along(at_risk_tbl) - 0.5, labels = xticks) + - ggplot2::scale_y_continuous(labels = rev(levels(annot_tbl$strata)), breaks = seq_len(nrow(at_risk_tbl))) - ) - - gg_plt <- cowplot::plot_grid( - gg_plt, gg_at_risk, - align = "v", axis = "tblr", ncol = 1, - rel_heights = c(rel_height_plot, 1 - rel_height_plot) - ) - gg_plt -} diff --git a/R/gg_km_utils.R b/R/gg_km_utils.R index 89e6bc13..99a90995 100644 --- a/R/gg_km_utils.R +++ b/R/gg_km_utils.R @@ -1,26 +1,3 @@ -f_conf_level <- function(conf_level) { - paste0(conf_level * 100, "% CI") -} - -control_surv_med_annot <- function(x = 0.8, y = 0.85, w = 0.32, h = 0.16, fill = TRUE) { - list(x = x, y = y, w = w, h = h, fill = fill) -} - -control_coxph_annot <- function(x = 0.29, y = 0.51, w = 0.4, h = 0.125, fill = TRUE, ref_lbls = FALSE) { - set_cli_abort_call() - - check_logical(ref_lbls) - if ((!anyNA(ref_lbls) && length(ref_lbls) >= 1)) { - cli::cli_abort( - "{.arg ref_lbls} must be a single {.cls logical} value (TRUE or FALSE).", - call = get_cli_abort_call() - ) - } - - res <- c(control_surv_med_annot(x = x, y = y, w = w, h = h), list(ref_lbls = ref_lbls)) - res -} - #' Convert Data Frame to ggplot2 Table Graphic #' #' @description Creates a `ggplot2` object that renders a data frame as a table graphic. @@ -71,12 +48,14 @@ df2gg <- function(df, colwidths = NULL, font_size = 10, col_labels = TRUE, ) } for (i in seq_len(ncol(df))) { - line_pos <- c(if (i == 1) { - 0 - } else { - sum(colwidths[1:(i - - 1)]) - }, sum(colwidths[1:i])) + line_pos <- c( + if (i == 1) { + 0 + } else { + sum(colwidths[1:(i - 1)]) + }, + sum(colwidths[1:i]) + ) res <- res + ggplot2::annotate("text", x = mean(line_pos), y = rev(seq_len(nrow(df))), label = df[, i], size = font_size / .pt, fontface = if (col_labels) { @@ -102,7 +81,6 @@ df2gg <- function(df, colwidths = NULL, font_size = 10, col_labels = TRUE, #' #' @keywords internal h_xticks <- function(data, xticks = NULL, max_time = NULL) { - # ... (function body remains the same) if (is.null(xticks)) { if (is.null(max_time)) { labeling::extended(range(data$time)[1], range(data$time)[2], m = 5) @@ -133,14 +111,14 @@ h_xticks <- function(data, xticks = NULL, max_time = NULL) { #' @description Extracts and formats the median survival time and its confidence interval #' from a fitted Kaplan-Meier object. #' -#' @param fit_km A fitted Kaplan-Meier object of class \code{survfit}. -#' @param armval Character string to use as the row name if \code{fit_km} has no strata (e.g., "All"). -#' @keywords internal +#' @inheritParams gg_km +#' #' @return A data frame with columns "N", "Median", and the confidence interval label. -h_tbl_median_surv <- function(fit_km, armval = "All") { - # ... (function body remains the same) +#' +#' @keywords internal +h_tbl_median_surv <- function(fit_km, strata_levels = "All") { y <- if (is.null(fit_km$strata)) { - as.data.frame(t(summary(fit_km)$table), row.names = armval) + as.data.frame(t(summary(fit_km)$table), row.names = strata_levels) } else { tbl <- summary(fit_km)$table rownames_lst <- strsplit(sub("=", "equals", rownames(tbl)), "equals") @@ -155,103 +133,6 @@ h_tbl_median_surv <- function(fit_km, armval = "All") { ) stats::setNames( y[c("records", "median", "CI")], - c("N", "Median", f_conf_level(conf.int)) + c("N", "Median", paste0(conf.int * 100, "% CI")) ) } - -#' Perform Pairwise Cox Proportional Hazards Regression -#' -#' This function performs a pairwise comparison of treatment arms using the **Cox proportional hazards model** and calculates the corresponding **log-rank p-value**. Each comparison is made between a specified reference group and all other comparison groups in the dataset. -#' -#' @param model_formula A [stats::formula] object specifying the survival model, typically in the form \code{Surv(time, status) ~ arm + covariates}. -#' @param data A `data.frame` containing the survival data, including time, status, and the arm variable. -#' @param arm A character string specifying the name of the column in \code{data} that contains the grouping/treatment arm variable (must be a factor-like variable). -#' @param ref_group A character string specifying the level of the \code{arm} variable to be used as the **reference group** for all pairwise comparisons. If \code{NULL} (the default), the **first unique level** of the \code{arm} column is used as the reference group. -#' -#' @return A `data.frame` with the results of the pairwise comparisons. The columns include: -#' \itemize{ -#' \item \code{arm}: The comparison arm being tested against the reference group. -#' \item \code{hr}: The Hazard Ratio (HR) for the comparison arm vs. the reference arm, formatted to two decimal places. -#' \item \code{ci}: The 95\% confidence interval for the HR, presented as a string in the format "(lower, upper)", with values formatted to two decimal places. -#' \item \code{pval}: The log-rank p-value for the comparison. -#' } -#' -#' @details The function iterates through each unique arm (excluding the reference group), filters the data to include only the current comparison arm and the reference arm, and then fits a Cox model (\code{\link[survival]{coxph}}) and performs a log-rank test (\code{\link[survival]{survdiff}}). The Hazard Ratio and its 95\% confidence interval are extracted from the Cox model summary, and the p-value is calculated from the log-rank test. -#' -#' @examples -#' # Example data setup (assuming 'time' is event time, 'status' is event indicator (1=event), -#' # and 'arm' is the treatment group) -#' library(survival) -#' use_lung <- lung -#' use_lung$arm <- factor(sample(c("A", "B", "C"), nrow(use_lung), replace = TRUE)) -#' use_lung$status <- use_lung$status - 1 # Convert status to 0/1 -#' use_lung <- na.omit(use_lung) -#' -#' formula <- Surv(time, status) ~ arm -#' results_tbl <- get_cox_pairwise_tbl( -#' model_formula = formula, -#' data = use_lung, -#' arm = "arm", -#' ref_group = "A" -#' ) -#' print(results_tbl) -#' -#' @export -get_cox_pairwise_tbl <- function(model_formula, data, arm, ref_group = NULL) { - set_cli_abort_call() - - # Input checks - if (!rlang::is_formula(model_formula)) { - cli::cli_abort( - "{.arg model_formula} must be a {.cls formula}.", - call = get_cli_abort_call() - ) - } - if (!is.factor(data[[arm]])) { - cli::cli_abort( - "Column {.arg {data}[[\"{.var {arm}}\"]]} must be a {.cls factor}.", - call = get_cli_abort_call() - ) - } - - # Determine reference and comparison groups - ref_group <- if (!is.null(ref_group)) { - ref_group - } else { - levels(data[[arm]])[1] - } - comp_group <- setdiff(levels(data[[arm]]), ref_group) - - ret <- c() - for (current_arm in comp_group) { - subset_arm <- c(ref_group, current_arm) - if (length(subset_arm) != 2) { - cli::cli_abort( - "{.arg subset_arm} must contain exactly 2 arms/groups (current length is {length(subset_arm)}).", - call = get_cli_abort_call() - ) - } - comp_df <- data[as.character(data[[arm]]) %in% subset_arm, ] - suppressWarnings( - coxph_ans <- coxph(formula = model_formula, data = comp_df) %>% summary() - ) - orginal_survdiff <- survdiff(formula = model_formula, data = comp_df) - log_rank_pvalue <- 1 - stats::pchisq(orginal_survdiff$chisq, length(orginal_survdiff$n) - - 1) - current_row <- data.frame( - hr = sprintf("%.2f", coxph_ans$conf.int[1, 1]), - ci = paste0( - "(", - sprintf("%.2f", coxph_ans$conf.int[1, 3]), - ", ", - sprintf("%.2f", coxph_ans$conf.int[1, 4]), - ")" - ), - pval = log_rank_pvalue - ) - rownames(current_row) <- current_arm - ret <- rbind(ret, current_row) - } - - return(ret) -} diff --git a/R/tbl_shift.R b/R/tbl_shift.R index 1c0f031e..3b06f46d 100644 --- a/R/tbl_shift.R +++ b/R/tbl_shift.R @@ -263,7 +263,7 @@ tbl_shift <- function(data, x$inputs <- tbl_shift_inputs x$call_list <- list(tbl_shift = match.call()) - x %>% + x |> structure(., class = c("tbl_shift", class(.))) } diff --git a/_pkgdown.yml b/_pkgdown.yml index a2921271..3015c9d7 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -45,7 +45,5 @@ reference: contents: - process_survfit - gg_km - - annot_surv_med - - annot_at_risk - - get_cox_pairwise_tbl - - annot_cox_ph + - annotate_gg_km + - get_cox_pairwise_df diff --git a/inst/WORDLIST b/inst/WORDLIST index 0db4adb4..123b296b 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -8,10 +8,12 @@ Kaplan Ns ORCID RStudio +Recode Rua SOCs Tidyverse cardx +customizations de flextable funder @@ -19,10 +21,9 @@ ggplot gtsummary pharma pre +recodes rlang's survfit tbl -tte tidyselect unstratified -customizations diff --git a/man/annotate_gg_km.Rd b/man/annotate_gg_km.Rd new file mode 100644 index 00000000..2af992a5 --- /dev/null +++ b/man/annotate_gg_km.Rd @@ -0,0 +1,110 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/annotate_gg_km.R +\name{annotate_gg_km} +\alias{annotate_gg_km} +\alias{annotate_surv_med} +\alias{annotate_risk} +\alias{annotate_coxph} +\title{Annotate Kaplan-Meier Plot} +\usage{ +annotate_surv_med(gg_plt, fit_km, ...) + +annotate_risk( + gg_plt, + fit_km, + title = "Patients at Risk:", + rel_height_plot = 0.75, + xlab = "Days", + ... +) + +annotate_coxph(gg_plt, coxph_tbl, ...) +} +\arguments{ +\item{gg_plt}{(\code{ggplot2} or \code{cowplot})\cr +The primary plot object (either a \code{ggplot2} or \code{cowplot} object) of the Kaplan-Meier plot to which +the Cox-PH annotation table will be added.} + +\item{fit_km}{(\code{survfit})\cr +A fitted Kaplan-Meier object of class \code{survfit} (from the \code{survival} package). This object contains +the necessary survival data used to calculate the numbers at risk.} + +\item{...}{Additional arguments passed to the control list for the annotation box. +These arguments override the default values. +Accepted arguments include: +\itemize{ +\item \code{x} (\code{numeric}): X-coordinate for the box anchor position (0 to 1). Default is \code{0.29}. +\item \code{y} (\code{numeric}): Y-coordinate for the box anchor position (0 to 1). Default is \code{0.51}. +\item \code{w} (\code{numeric}): Width of the annotation box (0 to 1). Default is \code{0.4}. +\item \code{h} (\code{numeric}): Height of the annotation box (0 to 1). Default is \code{0.125}. +}} + +\item{title}{(\code{string})\cr +A single logical value indicating whether to include a above the table. Defaults to +\verb{""Patients at Risk:""}. If \code{NULL}, no title is added.} + +\item{rel_height_plot}{(\code{numeric})\cr +A single numeric value defining the \strong{relative height} of the main Kaplan-Meier plot area compared +to the 'at-risk' table. This value should be between 0 and 1, where a value closer to 1 gives the main plot +more vertical space. Defaults to \code{0.75}.} + +\item{xlab}{(\code{character})\cr +A single character string for the \strong{x-axis label} on the 'at-risk' table. This typically represents +time (e.g., "Time (Days)").} + +\item{coxph_tbl}{(\code{data.frame})\cr +A data frame containing the pre-calculated Cox-PH results, typically from a function like \code{get_cox_pairwise_df}. +This data is used to generate the annotation table content.} +} +\value{ +The function \code{annotate_surv_med} returns a \code{cowplot} object with the median survival table annotation +added, ready for final display or saving. + +The function \code{annotate_risk} returns a \code{cowplot} object combining the KM plot and the 'Numbers at Risk' +table. + +The function \code{annotate_coxph} returns a \code{cowplot} object with the Cox-PH table annotation added. +} +\description{ +These functions provide capabilities to annotate Kaplan-Meier plots (\code{\link[=gg_km]{gg_km()}}) with additional summary tables, +including median survival times, numbers at risk, and cox proportional hazards results. +The annotations are added using the \code{cowplot} package for flexible placement. +} +\section{Functions}{ +\itemize{ +\item \code{annotate_surv_med()}: The \code{annotate_surv_med} function adds a median survival time summary table as an +annotation box. + +\item \code{annotate_risk()}: The function \code{annotate_risk} adds a "Numbers at Risk" table below a +Kaplan-Meier plot (\code{\link[=gg_km]{gg_km()}}) using \code{cowplot::plot_grid}. + +\item \code{annotate_coxph()}: The function \code{annotate_coxph()} adds a Cox Proportional Hazards summary table created by +the function \code{\link[=get_cox_pairwise_df]{get_cox_pairwise_df()}} as an annotation box. + +}} +\examples{ +# Preparing the Kaplan-Meier Plot +use_lung <- lung +use_lung$arm <- factor(sample(c("A", "B", "C"), nrow(use_lung), replace = TRUE)) +use_lung$status <- use_lung$status - 1 # Convert status to 0/1 +use_lung <- na.omit(use_lung) + +fit_kmg01 <- survfit(Surv(time, status) ~ arm, use_lung) +surv_plot_data <- process_survfit(fit_kmg01) + +plt_kmg01 <- gg_km(surv_plot_data) + +# Annotate Kaplan-Meier Plot with Median Survival Table +annotate_surv_med(plt_kmg01, fit_kmg01) + +# Annotate Plot with Numbers at Risk Table +annotate_risk(plt_kmg01, fit_kmg01) + +# Annotate Kaplan-Meier Plot with Cox-PH Table +coxph_tbl <- get_cox_pairwise_df(formula, data = use_lung, arm = "arm", ref_group = "A") +annotate_coxph(plt_kmg01, coxph_tbl) + +} +\seealso{ +\code{\link[=gg_km]{gg_km()}}, \code{\link[=process_survfit]{process_survfit()}}, and \code{\link[=get_cox_pairwise_df]{get_cox_pairwise_df()}} for related functionalities. +} diff --git a/man/get_cox_pairwise_df.Rd b/man/get_cox_pairwise_df.Rd new file mode 100644 index 00000000..b22ef410 --- /dev/null +++ b/man/get_cox_pairwise_df.Rd @@ -0,0 +1,76 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_cox_pairwise_df.R +\name{get_cox_pairwise_df} +\alias{get_cox_pairwise_df} +\title{Generate Table of Pairwise Cox-PH and Log-Rank Results} +\usage{ +get_cox_pairwise_df(model_formula, data, arm, ref_group = NULL) +} +\arguments{ +\item{model_formula}{(\code{formula})\cr +A \code{formula} object specifying the survival model, typically in the form \code{Surv(time, status) ~ arm + covariates}.} + +\item{data}{(\code{data.frame})\cr +A \code{data.frame} containing the survival data, including time, status, and the arm variable.} + +\item{arm}{(\code{character})\cr +A single character string specifying the name of the column in \code{data} that contains the grouping/treatment +\strong{arm variable}. This column \strong{must be a factor} for correct stratification and comparison.} + +\item{ref_group}{(\code{character} or \code{NULL})\cr +A single character string specifying the level of the \code{arm} variable to be used as the \strong{reference group} for +all pairwise comparisons. If \code{NULL} (the default), the \strong{first unique level} of the \code{arm} column is automatically +selected as the reference group.} +} +\value{ +A \code{data.frame} with the results of the pairwise comparisons. The columns include: +\itemize{ +\item \code{arm}: (rownames of the \code{data.frame}) The comparison arm (group) being tested against the reference group. +\item \code{hr}: The Hazard Ratio (HR) for the comparison arm vs. the reference arm, formatted to two decimal places. +\item \code{ci}: The 95\% confidence interval for the HR, presented as a string in the format "(lower, upper)", with +values formatted to two decimal places. +\item \code{pval}: The log-rank p-value for the comparison. +} +} +\description{ +This function performs pairwise comparisons of treatment arms using the \strong{Cox Proportional Hazards model} and +calculates the corresponding \strong{log-rank p-value}. Each comparison tests a non-reference group against a specified +reference group. +} +\details{ +The function iterates through each unique arm (excluding the reference group). For each iteration, it +filters the data to include only the current comparison arm and the reference arm, and then: +\itemize{ +\item Fits a Cox model using \code{survival::coxph}. +\item Performs a log-rank test using \code{survival::survdiff}. +} +The Hazard Ratio and its 95\% confidence interval are extracted from the Cox model summary, and the p-value is +extracted from the log-rank test. +} +\examples{ +# Example data setup (assuming 'time' is event time, 'status' is event indicator (1=event), +# and 'arm' is the treatment group) +library(dplyr) # For better data handling + +# Prepare data in a modern dplyr-friendly way +surv_data <- lung |> + mutate( + arm = factor(sample(c("A", "B", "C"), n(), replace = TRUE)), + status = status - 1 # Convert status to 0/1 + ) |> + filter(if_all(everything(), ~ !is.na(.))) + +formula <- Surv(time, status) ~ arm +results_tbl <- get_cox_pairwise_df( + model_formula = formula, + data = surv_data, + arm = "arm", + ref_group = "A" +) +print(results_tbl) + +} +\seealso{ +\code{annotate_gg_km()}, \code{gg_km()}, and the \code{survival} package functions \code{survival::coxph} and +\code{survival::survdiff}. +} diff --git a/man/get_cox_pairwise_tbl.Rd b/man/get_cox_pairwise_tbl.Rd deleted file mode 100644 index db7eb1cb..00000000 --- a/man/get_cox_pairwise_tbl.Rd +++ /dev/null @@ -1,51 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gg_km_utils.R -\name{get_cox_pairwise_tbl} -\alias{get_cox_pairwise_tbl} -\title{Perform Pairwise Cox Proportional Hazards Regression} -\usage{ -get_cox_pairwise_tbl(model_formula, data, arm, ref_group = NULL) -} -\arguments{ -\item{model_formula}{A \link[stats:formula]{stats::formula} object specifying the survival model, typically in the form \code{Surv(time, status) ~ arm + covariates}.} - -\item{data}{A \code{data.frame} containing the survival data, including time, status, and the arm variable.} - -\item{arm}{A character string specifying the name of the column in \code{data} that contains the grouping/treatment arm variable (must be a factor-like variable).} - -\item{ref_group}{A character string specifying the level of the \code{arm} variable to be used as the \strong{reference group} for all pairwise comparisons. If \code{NULL} (the default), the \strong{first unique level} of the \code{arm} column is used as the reference group.} -} -\value{ -A \code{data.frame} with the results of the pairwise comparisons. The columns include: -\itemize{ -\item \code{arm}: The comparison arm being tested against the reference group. -\item \code{hr}: The Hazard Ratio (HR) for the comparison arm vs. the reference arm, formatted to two decimal places. -\item \code{ci}: The 95\\% confidence interval for the HR, presented as a string in the format "(lower, upper)", with values formatted to two decimal places. -\item \code{pval}: The log-rank p-value for the comparison. -} -} -\description{ -This function performs a pairwise comparison of treatment arms using the \strong{Cox proportional hazards model} and calculates the corresponding \strong{log-rank p-value}. Each comparison is made between a specified reference group and all other comparison groups in the dataset. -} -\details{ -The function iterates through each unique arm (excluding the reference group), filters the data to include only the current comparison arm and the reference arm, and then fits a Cox model (\code{\link[survival]{coxph}}) and performs a log-rank test (\code{\link[survival]{survdiff}}). The Hazard Ratio and its 95\\% confidence interval are extracted from the Cox model summary, and the p-value is calculated from the log-rank test. -} -\examples{ -# Example data setup (assuming 'time' is event time, 'status' is event indicator (1=event), -# and 'arm' is the treatment group) -library(survival) -use_lung <- lung -use_lung$arm <- factor(sample(c("A", "B", "C"), nrow(use_lung), replace = TRUE)) -use_lung$status <- use_lung$status - 1 # Convert status to 0/1 -use_lung <- na.omit(use_lung) - -formula <- Surv(time, status) ~ arm -results_tbl <- get_cox_pairwise_tbl( - model_formula = formula, - data = use_lung, - arm = "arm", - ref_group = "A" -) -print(results_tbl) - -} diff --git a/man/gg_km.Rd b/man/gg_km.Rd index ec9ab75e..cfe3e0a8 100644 --- a/man/gg_km.Rd +++ b/man/gg_km.Rd @@ -3,12 +3,9 @@ \name{gg_km} \alias{gg_km} \alias{process_survfit} -\alias{annot_surv_med} -\alias{annot_cox_ph} -\alias{annot_at_risk} \title{Kaplan-Meier Plot} \usage{ -process_survfit(fit_km, armval = "All", max_time = NULL) +process_survfit(fit_km, strata_levels = "All", max_time = NULL) gg_km( surv_plot_data, @@ -19,54 +16,29 @@ gg_km( size = 2, max_time = NULL, xticks = NULL, - xlab = "Days", yval = c("Survival", "Failure"), - ylab = paste(yval, "Probability"), ylim = NULL, font_size = 10, legend_pos = NULL ) - -annot_surv_med( - gg_plt, - fit_km, - control_annot_surv_med = control_surv_med_annot(), - font_size = 10 -) - -annot_cox_ph( - gg_plt, - coxph_tbl, - control_annot_coxph = control_coxph_annot(), - font_size = 10 -) - -annot_at_risk( - gg_plt, - fit_km, - font_size = 10, - annot_at_risk_title = TRUE, - rel_height_plot = 0.75, - xlab = "Days" -) } \arguments{ -\item{fit_km}{A fitted Kaplan-Meier object of class \code{survfit}, used to generate the table data.} +\item{fit_km}{A fitted Kaplan-Meier object of class \code{survfit}.} -\item{armval}{(\code{string})\cr +\item{strata_levels}{(\code{string})\cr A single character string used as the strata level if the input \code{fit_km} object has no strata (e.g., \code{"All"}).} \item{max_time}{(\code{numeric})\cr -A single numeric value defining the \strong{maximum time point} to display on the x-axis. Data points beyond this time will be clipped.} +A single numeric value defining the \strong{maximum time point} to display on the x-axis.} \item{surv_plot_data}{(\code{data.frame})\cr A data frame containing the pre-processed survival data, ready for plotting. This data should be equivalent to the output of \code{process_survfit}.} \item{lty}{(\code{numeric} or \code{NULL})\cr -A numeric vector of \strong{line types} (e.g., \code{1} for solid, \code{2} for dashed) for the survival curves, or \code{NULL} for \code{ggplot2} defaults. -The length should match the number of arms/groups.} +A numeric vector of \strong{line types} (e.g., \code{1} for solid, \code{2} for dashed) for the survival curves, or \code{NULL} for +\code{ggplot2} defaults. The length should match the number of arms/groups.} \item{lwd}{(\code{numeric})\cr A single numeric value specifying the \strong{line width} for the survival curves.} @@ -81,53 +53,33 @@ A single numeric value specifying the \strong{plotting character} (point shape c A single numeric value specifying the \strong{size} of the censoring marks.} \item{xticks}{(\code{numeric} or \code{NULL})\cr -A numeric vector of explicit \strong{x-axis tick positions}, or a single numeric value representing the \strong{interval} between ticks, or \code{NULL} for automatic \code{ggplot2} scaling.} - -\item{xlab}{Character string for the x-axis label on the 'at-risk' table (typically time).} +A numeric vector of explicit \strong{x-axis tick positions}, or a single numeric value representing the \strong{interval} +between ticks, or \code{NULL} for automatic \code{ggplot2} scaling.} \item{yval}{(\code{character})\cr -A single character string, either \code{"Survival"} or \code{"Failure"} to plot the corresponding probability. Case sensitive.} - -\item{ylab}{(\code{character})\cr -A single character string for the \strong{y-axis label}.} +A single character string, either \code{"Survival"} or \code{"Failure"} to plot the corresponding probability.} \item{ylim}{(\code{numeric})\cr A \strong{numeric vector of length 2} defining the lower and upper limits of the y-axis (e.g., \code{c(0, 1)}).} -\item{font_size}{Numeric, base font size for the table.} +\item{font_size}{(\code{numeric})\cr +A single numeric value specifying the \strong{base font size} for the plot theme elements.} \item{legend_pos}{(\code{numeric} or \code{NULL})\cr -A \strong{numeric vector of length 2} defining the \strong{legend position} as (x, y) coordinates relative to the plot area (ranging from 0 to 1), or \code{NULL} for external, automatic placement.} - -\item{gg_plt}{A \code{ggplot2} or \code{cowplot} object of the Kaplan-Meier plot.} - -\item{control_annot_surv_med}{A list of control parameters for the annotation box.} - -\item{coxph_tbl}{A data frame containing pre-calculated Cox-PH results.} - -\item{control_annot_coxph}{A list of control parameters for the annotation box.} - -\item{annot_at_risk_title}{Logical, whether to include the title "Patients at Risk:".} - -\item{rel_height_plot}{Numeric, relative height of the main plot area compared to the 'at-risk' table (0 to 1).} +A \strong{numeric vector of length 2} defining the \strong{legend position} as (x, y) coordinates relative to the plot +area (ranging from 0 to 1), or \code{NULL} for automatic placement.} } \value{ The function \code{process_survfit} returns a data frame containing the survival curve steps, confidence intervals, and censoring info. The function \code{gg_km} returns a \code{ggplot2} object of the KM plot. - -The \code{annot_surv_med} function returns a \code{cowplot} object with the median survival table annotation added. - -The function \code{annot_surv_med} returns a \code{cowplot} object with the Cox-PH table annotation added. - -The function \code{annot_at_risk} returns a \code{cowplot} object combining the KM plot and the 'Numbers at Risk' table. } \description{ This set of functions facilitates the creation of Kaplan-Meier survival plots using \code{ggplot2}. Use \code{process_survfit()} to prepare the survival data from a fitted \code{survfit} object, and then \code{gg_km()} to generate the Kaplan-Meier plot with various customization options. Additional functions -like \code{annot_surv_med()}, \code{annot_cox_ph()}, and \code{annot_at_risk()} allow for adding summary tables and +like \code{annot_surv_med()}, \code{annot_cox_ph()}, and \code{annotate_risk()} allow for adding summary tables and annotations to the plot. } \details{ @@ -142,14 +94,6 @@ suitable for plotting a Kaplan-Meier curve with \code{ggplot2}. Time zero is als \item \code{gg_km()}: creates a Kaplan-Meier survival curve, with support for various customizations like censoring marks, Confidence Intervals (CIs), and axis control. -\item \code{annot_surv_med()}: The \code{annot_surv_med} function adds a median survival time summary table as an annotation box on a -Kaplan-Meier plot using \code{cowplot}. - -\item \code{annot_cox_ph()}: The function \code{annot_cox_ph} adds a Cox Proportional Hazards summary table created by the function \code{\link{get_cox_pairwise_tbl}} as an annotation box on a -Kaplan-Meier plot using \code{cowplot}. - -\item \code{annot_at_risk()}: The function \code{annot_at_risk} adds a "Numbers at Risk" table below a Kaplan-Meier plot using \code{cowplot::plot_grid}. - }} \examples{ # Data preparation for KM plot @@ -167,9 +111,7 @@ surv_plot_data <- process_survfit(fit_kmg01) head(surv_plot_data) # Example of making the KM plot -plt_kmg01 <- gg_km(surv_plot_data, - xlab = "Time (Days)" -) +plt_kmg01 <- gg_km(surv_plot_data) # Confidence Interval as Ribbon plt_kmg01 + @@ -179,14 +121,9 @@ plt_kmg01 + plt_kmg01 + ggplot2::labs(title = "title", caption = "footnotes") -# Annotate Kaplan-Meier Plot with Median Survival Table -annot_surv_med(plt_kmg01, fit_kmg01) - -# Annotate Kaplan-Meier Plot with Cox-PH Table -coxph_tbl <- get_cox_pairwise_tbl(formula, data = use_lung, arm = "arm", ref_group = "A") -annot_cox_ph(plt_kmg01, coxph_tbl) - -# Annotate Plot with Numbers at Risk Table -annot_at_risk(plt_kmg01, fit_kmg01) +# Changing xlab and ylab +plt_kmg01 + + xlab("Another Day") + + ylab("THE Survival Probability") } diff --git a/man/h_tbl_median_surv.Rd b/man/h_tbl_median_surv.Rd index fb2b827f..17812e95 100644 --- a/man/h_tbl_median_surv.Rd +++ b/man/h_tbl_median_surv.Rd @@ -4,12 +4,14 @@ \alias{h_tbl_median_surv} \title{Median Survival Summary Table} \usage{ -h_tbl_median_surv(fit_km, armval = "All") +h_tbl_median_surv(fit_km, strata_levels = "All") } \arguments{ \item{fit_km}{A fitted Kaplan-Meier object of class \code{survfit}.} -\item{armval}{Character string to use as the row name if \code{fit_km} has no strata (e.g., "All").} +\item{strata_levels}{(\code{string})\cr +A single character string used as the strata level if the input \code{fit_km} object +has no strata (e.g., \code{"All"}).} } \value{ A data frame with columns "N", "Median", and the confidence interval label. diff --git a/tests/testthat/test-gg_km.R b/tests/testthat/test-gg_km.R new file mode 100644 index 00000000..083f3dcd --- /dev/null +++ b/tests/testthat/test-gg_km.R @@ -0,0 +1,38 @@ +test_that("gg_km() works with default inputs", { + anl <- cards::ADTTE |> + dplyr::mutate(is_event = CNSR == 0) + by <- "TRTP" + anl[[by]] <- factor(anl[[by]], levels = c( + "Placebo", + "Xanomeline Low Dose", + "Xanomeline High Dose" + )) + group_sym <- rlang::sym(by) + model_formula <- rlang::new_formula( + lhs = rlang::expr(Surv(AVAL, is_event)), + rhs = rlang::expr(!!group_sym) + ) + + fit_kmg01 <- survival::survfit(model_formula, anl) + + expect_no_error( + surv_plot_data <- process_survfit(fit_kmg01) + ) + + expect_no_error( + suppressWarnings( + coxph_tbl <- get_cox_pairwise_df( + model_formula, + data = anl, + arm = by + ) + ) + ) + + expect_no_error( + plt_kmg01 <- gg_km(surv_plot_data) |> + annotate_surv_med(fit_kmg01) |> + annotate_coxph(coxph_tbl) |> + annotate_risk(fit_kmg01) + ) +}) diff --git a/tests/testthat/test-gkm.R b/tests/testthat/test-gkm.R deleted file mode 100644 index fd05c0ea..00000000 --- a/tests/testthat/test-gkm.R +++ /dev/null @@ -1,43 +0,0 @@ -skip_on_cran() - -anl <- cards::ADTTE |> - dplyr::mutate(is_event = CNSR == 0) -by <- "TRTP" -anl[[by]] <- factor(anl[[by]], levels = c( - "Placebo", - "Xanomeline Low Dose", - "Xanomeline High Dose" -)) - -test_that("test gkm() works", { - GROUP_SYM <- rlang::sym(by) - model_formula <- rlang::new_formula( - lhs = rlang::expr(Surv(AVAL, is_event)), - rhs = rlang::expr(!!GROUP_SYM) - ) - - fit_kmg01 <- survival::survfit(model_formula, anl) - - - expect_no_error(surv_plot_data <- process_survfit(fit_kmg01)) - - expect_no_error( - suppressWarnings( - coxph_tbl <- get_cox_pairwise_tbl( - model_formula, - data = anl, - arm = by - ) - ) - ) - - expect_no_error( - plt_kmg01 <- gg_km(surv_plot_data, - xlab = "Time (Days)", - ylim = c(0.9, 1) - ) %>% - annot_surv_med(fit_kmg01) %>% - annot_cox_ph(coxph_tbl) %>% - annot_at_risk(fit_kmg01) - ) -}) From 786f40b350480504894aac6e18075aa4da535b2e Mon Sep 17 00:00:00 2001 From: melkiades Date: Mon, 8 Dec 2025 17:22:20 +0100 Subject: [PATCH 35/51] fix --- R/tbl_shift.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/tbl_shift.R b/R/tbl_shift.R index 3b06f46d..1c0f031e 100644 --- a/R/tbl_shift.R +++ b/R/tbl_shift.R @@ -263,7 +263,7 @@ tbl_shift <- function(data, x$inputs <- tbl_shift_inputs x$call_list <- list(tbl_shift = match.call()) - x |> + x %>% structure(., class = c("tbl_shift", class(.))) } From 3b3b99199c9eb87aca7fbb267b5a57811a2aa245 Mon Sep 17 00:00:00 2001 From: melkiades Date: Mon, 8 Dec 2025 17:35:39 +0100 Subject: [PATCH 36/51] fix --- R/annotate_gg_km.R | 2 +- R/get_cox_pairwise_df.R | 2 +- R/gg_km.R | 2 +- man/annotate_gg_km.Rd | 2 +- man/get_cox_pairwise_df.Rd | 2 +- man/gg_km.Rd | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/annotate_gg_km.R b/R/annotate_gg_km.R index c4b6051c..73f0e8ff 100644 --- a/R/annotate_gg_km.R +++ b/R/annotate_gg_km.R @@ -9,7 +9,7 @@ #' #' @examples #' # Preparing the Kaplan-Meier Plot -#' use_lung <- lung +#' use_lung <- survival::lung #' use_lung$arm <- factor(sample(c("A", "B", "C"), nrow(use_lung), replace = TRUE)) #' use_lung$status <- use_lung$status - 1 # Convert status to 0/1 #' use_lung <- na.omit(use_lung) diff --git a/R/get_cox_pairwise_df.R b/R/get_cox_pairwise_df.R index cdaa4ceb..9f7980db 100644 --- a/R/get_cox_pairwise_df.R +++ b/R/get_cox_pairwise_df.R @@ -44,7 +44,7 @@ #' library(dplyr) # For better data handling #' #' # Prepare data in a modern dplyr-friendly way -#' surv_data <- lung |> +#' surv_data <- survival::lung |> #' mutate( #' arm = factor(sample(c("A", "B", "C"), n(), replace = TRUE)), #' status = status - 1 # Convert status to 0/1 diff --git a/R/gg_km.R b/R/gg_km.R index 4717c038..44492cb2 100644 --- a/R/gg_km.R +++ b/R/gg_km.R @@ -30,7 +30,7 @@ NULL #' #' @examples #' # Data preparation for KM plot -#' use_lung <- lung +#' use_lung <- survival::lung #' use_lung$arm <- factor(sample(c("A", "B", "C"), nrow(use_lung), replace = TRUE)) #' use_lung$status <- use_lung$status - 1 # Convert status to 0/1 #' use_lung <- na.omit(use_lung) diff --git a/man/annotate_gg_km.Rd b/man/annotate_gg_km.Rd index 2af992a5..542f3c58 100644 --- a/man/annotate_gg_km.Rd +++ b/man/annotate_gg_km.Rd @@ -84,7 +84,7 @@ the function \code{\link[=get_cox_pairwise_df]{get_cox_pairwise_df()}} as an ann }} \examples{ # Preparing the Kaplan-Meier Plot -use_lung <- lung +use_lung <- survival::lung use_lung$arm <- factor(sample(c("A", "B", "C"), nrow(use_lung), replace = TRUE)) use_lung$status <- use_lung$status - 1 # Convert status to 0/1 use_lung <- na.omit(use_lung) diff --git a/man/get_cox_pairwise_df.Rd b/man/get_cox_pairwise_df.Rd index b22ef410..fee3c83b 100644 --- a/man/get_cox_pairwise_df.Rd +++ b/man/get_cox_pairwise_df.Rd @@ -53,7 +53,7 @@ extracted from the log-rank test. library(dplyr) # For better data handling # Prepare data in a modern dplyr-friendly way -surv_data <- lung |> +surv_data <- survival::lung |> mutate( arm = factor(sample(c("A", "B", "C"), n(), replace = TRUE)), status = status - 1 # Convert status to 0/1 diff --git a/man/gg_km.Rd b/man/gg_km.Rd index cfe3e0a8..a266577b 100644 --- a/man/gg_km.Rd +++ b/man/gg_km.Rd @@ -97,7 +97,7 @@ support for various customizations like censoring marks, Confidence Intervals ( }} \examples{ # Data preparation for KM plot -use_lung <- lung +use_lung <- survival::lung use_lung$arm <- factor(sample(c("A", "B", "C"), nrow(use_lung), replace = TRUE)) use_lung$status <- use_lung$status - 1 # Convert status to 0/1 use_lung <- na.omit(use_lung) From 0de370e7e1dfd3aac778a5472f0181c60adb0034 Mon Sep 17 00:00:00 2001 From: melkiades Date: Tue, 9 Dec 2025 10:55:45 +0100 Subject: [PATCH 37/51] fix --- R/annotate_gg_km.R | 2 +- R/get_cox_pairwise_df.R | 2 +- R/gg_km.R | 4 ++-- man/annotate_gg_km.Rd | 2 +- man/get_cox_pairwise_df.Rd | 2 +- man/gg_km.Rd | 4 ++-- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/R/annotate_gg_km.R b/R/annotate_gg_km.R index 73f0e8ff..1ddc0ba1 100644 --- a/R/annotate_gg_km.R +++ b/R/annotate_gg_km.R @@ -14,7 +14,7 @@ #' use_lung$status <- use_lung$status - 1 # Convert status to 0/1 #' use_lung <- na.omit(use_lung) #' -#' fit_kmg01 <- survfit(Surv(time, status) ~ arm, use_lung) +#' fit_kmg01 <- survival::survfit(survival::Surv(time, status) ~ arm, use_lung) #' surv_plot_data <- process_survfit(fit_kmg01) #' #' plt_kmg01 <- gg_km(surv_plot_data) diff --git a/R/get_cox_pairwise_df.R b/R/get_cox_pairwise_df.R index 9f7980db..1a1f921e 100644 --- a/R/get_cox_pairwise_df.R +++ b/R/get_cox_pairwise_df.R @@ -51,7 +51,7 @@ #' ) |> #' filter(if_all(everything(), ~ !is.na(.))) #' -#' formula <- Surv(time, status) ~ arm +#' formula <- survival::Surv(time, status) ~ arm #' results_tbl <- get_cox_pairwise_df( #' model_formula = formula, #' data = surv_data, diff --git a/R/gg_km.R b/R/gg_km.R index 44492cb2..24c0f117 100644 --- a/R/gg_km.R +++ b/R/gg_km.R @@ -36,8 +36,8 @@ NULL #' use_lung <- na.omit(use_lung) #' #' # Fit Kaplan-Meier model -#' formula <- Surv(time, status) ~ arm -#' fit_kmg01 <- survfit(formula, use_lung) +#' formula <- survival::Surv(time, status) ~ arm +#' fit_kmg01 <- survival::survfit(formula, use_lung) #' #' # Process survfit data for plotting #' surv_plot_data <- process_survfit(fit_kmg01) diff --git a/man/annotate_gg_km.Rd b/man/annotate_gg_km.Rd index 542f3c58..7e7c7624 100644 --- a/man/annotate_gg_km.Rd +++ b/man/annotate_gg_km.Rd @@ -89,7 +89,7 @@ use_lung$arm <- factor(sample(c("A", "B", "C"), nrow(use_lung), replace = TRUE)) use_lung$status <- use_lung$status - 1 # Convert status to 0/1 use_lung <- na.omit(use_lung) -fit_kmg01 <- survfit(Surv(time, status) ~ arm, use_lung) +fit_kmg01 <- survival::survfit(survival::Surv(time, status) ~ arm, use_lung) surv_plot_data <- process_survfit(fit_kmg01) plt_kmg01 <- gg_km(surv_plot_data) diff --git a/man/get_cox_pairwise_df.Rd b/man/get_cox_pairwise_df.Rd index fee3c83b..beaad285 100644 --- a/man/get_cox_pairwise_df.Rd +++ b/man/get_cox_pairwise_df.Rd @@ -60,7 +60,7 @@ surv_data <- survival::lung |> ) |> filter(if_all(everything(), ~ !is.na(.))) -formula <- Surv(time, status) ~ arm +formula <- survival::Surv(time, status) ~ arm results_tbl <- get_cox_pairwise_df( model_formula = formula, data = surv_data, diff --git a/man/gg_km.Rd b/man/gg_km.Rd index a266577b..bff16afe 100644 --- a/man/gg_km.Rd +++ b/man/gg_km.Rd @@ -103,8 +103,8 @@ use_lung$status <- use_lung$status - 1 # Convert status to 0/1 use_lung <- na.omit(use_lung) # Fit Kaplan-Meier model -formula <- Surv(time, status) ~ arm -fit_kmg01 <- survfit(formula, use_lung) +formula <- survival::Surv(time, status) ~ arm +fit_kmg01 <- survival::survfit(formula, use_lung) # Process survfit data for plotting surv_plot_data <- process_survfit(fit_kmg01) From 1448707235ac31ffa8f2b8540a106a2fad74bbff Mon Sep 17 00:00:00 2001 From: melkiades Date: Tue, 9 Dec 2025 15:17:26 +0100 Subject: [PATCH 38/51] fix --- R/annotate_gg_km.R | 3 ++- man/annotate_gg_km.Rd | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/R/annotate_gg_km.R b/R/annotate_gg_km.R index 1ddc0ba1..a523100f 100644 --- a/R/annotate_gg_km.R +++ b/R/annotate_gg_km.R @@ -14,7 +14,8 @@ #' use_lung$status <- use_lung$status - 1 # Convert status to 0/1 #' use_lung <- na.omit(use_lung) #' -#' fit_kmg01 <- survival::survfit(survival::Surv(time, status) ~ arm, use_lung) +#' formula <- survival::Surv(time, status) ~ arm +#' fit_kmg01 <- survival::survfit(formula, use_lung) #' surv_plot_data <- process_survfit(fit_kmg01) #' #' plt_kmg01 <- gg_km(surv_plot_data) diff --git a/man/annotate_gg_km.Rd b/man/annotate_gg_km.Rd index 7e7c7624..e658ce2d 100644 --- a/man/annotate_gg_km.Rd +++ b/man/annotate_gg_km.Rd @@ -89,7 +89,8 @@ use_lung$arm <- factor(sample(c("A", "B", "C"), nrow(use_lung), replace = TRUE)) use_lung$status <- use_lung$status - 1 # Convert status to 0/1 use_lung <- na.omit(use_lung) -fit_kmg01 <- survival::survfit(survival::Surv(time, status) ~ arm, use_lung) +formula <- survival::Surv(time, status) ~ arm +fit_kmg01 <- survival::survfit(formula, use_lung) surv_plot_data <- process_survfit(fit_kmg01) plt_kmg01 <- gg_km(surv_plot_data) From 98a5948a957e416edf399fc99f75e9c0300bee71 Mon Sep 17 00:00:00 2001 From: melkiades Date: Tue, 9 Dec 2025 15:45:18 +0100 Subject: [PATCH 39/51] final fix --- R/gg_km.R | 4 ++-- man/gg_km.Rd | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/gg_km.R b/R/gg_km.R index 24c0f117..ee706c4e 100644 --- a/R/gg_km.R +++ b/R/gg_km.R @@ -153,8 +153,8 @@ process_survfit <- function(fit_km, #' #' # Changing xlab and ylab #' plt_kmg01 + -#' xlab("Another Day") + -#' ylab("THE Survival Probability") +#' ggplot2::xlab("Another Day") + +#' ggplot2::ylab("THE Survival Probability") #' #' @export gg_km <- function(surv_plot_data, diff --git a/man/gg_km.Rd b/man/gg_km.Rd index bff16afe..0997c5da 100644 --- a/man/gg_km.Rd +++ b/man/gg_km.Rd @@ -123,7 +123,7 @@ plt_kmg01 + # Changing xlab and ylab plt_kmg01 + - xlab("Another Day") + - ylab("THE Survival Probability") + ggplot2::xlab("Another Day") + + ggplot2::ylab("THE Survival Probability") } From b4716e219ac7f170afef66083dffc6bc99a0e930 Mon Sep 17 00:00:00 2001 From: melkiades Date: Tue, 9 Dec 2025 16:12:47 +0100 Subject: [PATCH 40/51] final fix 2 --- R/gg_km_utils.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/gg_km_utils.R b/R/gg_km_utils.R index 99a90995..76951a30 100644 --- a/R/gg_km_utils.R +++ b/R/gg_km_utils.R @@ -87,7 +87,7 @@ h_xticks <- function(data, xticks = NULL, max_time = NULL) { } else { labeling::extended(range(data$time)[1], max(range(data$time)[2], max_time), m = 5) } - } else if (checkmate::test_number(xticks)) { + } else if (is.numeric(xticks) && length(xticks) == 1 && !is.na(xticks)) { if (is.null(max_time)) { seq(0, max(data$time), xticks) } else { From b09966db22f7dd9f72cfc0ea37cb0ee55d5ddaa4 Mon Sep 17 00:00:00 2001 From: melkiades Date: Wed, 10 Dec 2025 11:32:34 +0100 Subject: [PATCH 41/51] almost fixed --- R/annotate_gg_km.R | 10 ++--- R/gg_km_utils.R | 93 +++++++++++++++++++++++++++++----------------- man/df2gg.Rd | 5 ++- 3 files changed, 65 insertions(+), 43 deletions(-) diff --git a/R/annotate_gg_km.R b/R/annotate_gg_km.R index a523100f..880dd91e 100644 --- a/R/annotate_gg_km.R +++ b/R/annotate_gg_km.R @@ -189,7 +189,8 @@ annotate_risk <- function(gg_plt, fit_km, title = "Patients at Risk:", gg_at_risk <- df2gg( at_risk_tbl, font_size = eargs$font_size, col_labels = FALSE, hline = FALSE, - colwidths = rep(1, ncol(at_risk_tbl)) + colwidths = rep(1, ncol(at_risk_tbl)), + add_proper_xaxis = TRUE ) + ggplot2::labs(title = if (!is.null(title)) title else NULL, x = xlab) + ggplot2::theme_bw(base_size = eargs$font_size) + @@ -204,15 +205,10 @@ annotate_risk <- function(gg_plt, fit_km, title = "Patients at Risk:", axis.line.x = ggplot2::element_line() ) + ggplot2::coord_cartesian(clip = "off", ylim = c(0.5, nrow(at_risk_tbl))) - gg_at_risk <- suppressMessages( - gg_at_risk + - ggplot2::scale_x_continuous(expand = c(0.1, 0), breaks = seq_along(at_risk_tbl) - 0.5, labels = xticks) + - ggplot2::scale_y_continuous(labels = rev(levels(annot_tbl$strata)), breaks = seq_len(nrow(at_risk_tbl))) - ) gg_plt <- cowplot::plot_grid( gg_plt, gg_at_risk, - align = "v", axis = "tblr", ncol = 1, + align = "vh", axis = "b", ncol = 1, rel_heights = c(rel_height_plot, 1 - rel_height_plot) ) gg_plt diff --git a/R/gg_km_utils.R b/R/gg_km_utils.R index 76951a30..9b55563c 100644 --- a/R/gg_km_utils.R +++ b/R/gg_km_utils.R @@ -9,12 +9,13 @@ #' @param col_lab_fontface Character string for the font face of column labels (e.g., "bold"). #' @param hline Logical, whether to draw a horizontal line below the column labels. #' @param bg_fill Optional color string for the plot background. +#' @param add_proper_xaxis Logical, whether to add a proper x-axis with column values. #' #' @return A \code{ggplot2} object representing the table. #' #' @keywords internal df2gg <- function(df, colwidths = NULL, font_size = 10, col_labels = TRUE, - col_lab_fontface = "bold", hline = TRUE, bg_fill = NULL) { + col_lab_fontface = "bold", hline = TRUE, bg_fill = NULL, add_proper_xaxis = FALSE) { df <- as.data.frame(apply(df, 1:2, function(x) { if (is.na(x)) { "NA" @@ -30,41 +31,63 @@ df2gg <- function(df, colwidths = NULL, font_size = 10, col_labels = TRUE, colwidths <- apply(df, 2, function(x) max(nchar(x), na.rm = TRUE)) } tot_width <- sum(colwidths) - res <- ggplot2::ggplot(data = df) + - ggplot2::theme_void() + - ggplot2::scale_x_continuous(limits = c( - 0, - tot_width - )) + - ggplot2::scale_y_continuous(limits = c(1, nrow(df))) - if (!is.null(bg_fill)) { - res <- res + ggplot2::theme(plot.background = ggplot2::element_rect(fill = bg_fill)) - } - if (hline) { - res <- res + ggplot2::annotate("segment", - x = 0 + 0.2 * colwidths[2], - xend = tot_width - 0.1 * tail(colwidths, 1), y = nrow(df) - - 0.5, yend = nrow(df) - 0.5 - ) - } - for (i in seq_len(ncol(df))) { - line_pos <- c( - if (i == 1) { - 0 - } else { - sum(colwidths[1:(i - 1)]) - }, - sum(colwidths[1:i]) - ) - res <- res + ggplot2::annotate("text", - x = mean(line_pos), y = rev(seq_len(nrow(df))), - label = df[, i], size = font_size / .pt, fontface = if (col_labels) { - c(col_lab_fontface, rep("plain", nrow(df) - 1)) - } else { - rep("plain", nrow(df)) - } - ) + if (add_proper_xaxis) { + df_long <- df |> + # 1. Ensure the row names ('A', 'B', 'C') are a column named 'row_name' + tibble::rownames_to_column("row_name") |> + # 2. Pivot the remaining columns (starting from '0' to the end) longer + tidyr::pivot_longer( + cols = -row_name, # Select all columns EXCEPT 'row_name' + names_to = "col_name", # Name the new column containing the old column headers + values_to = "value" # Name the new column containing the data values + ) |> + dplyr::arrange(row_name, col_name) |> + mutate( + col_name = as.numeric(col_name), + row_name = factor(row_name, levels = unique(row_name)) + ) + res <- ggplot2::ggplot(data = df_long) + + ggplot2::theme_void() + + ggplot2::annotate("text", x = df_long$col_name, y = df_long$row_name, label = df_long$value, size = font_size / .pt) + + } else { + res <- ggplot2::ggplot(data = df) + + ggplot2::theme_void() + + ggplot2::scale_x_continuous(limits = c( + 0, + tot_width + )) + + ggplot2::scale_y_continuous(limits = c(1, nrow(df))) + if (!is.null(bg_fill)) { + res <- res + ggplot2::theme(plot.background = ggplot2::element_rect(fill = bg_fill)) + } + if (hline) { + res <- res + ggplot2::annotate("segment", + x = 0 + 0.2 * colwidths[2], + xend = tot_width - 0.1 * tail(colwidths, 1), y = nrow(df) - + 0.5, yend = nrow(df) - 0.5 + ) + } + for (i in seq_len(ncol(df))) { + line_pos <- c( + if (i == 1) { + 0 + } else { + sum(colwidths[1:(i - 1)]) + }, + sum(colwidths[1:i]) + ) + res <- res + ggplot2::annotate("text", + x = mean(line_pos), y = rev(seq_len(nrow(df))), + label = df[, i], size = font_size / .pt, fontface = if (col_labels) { + c(col_lab_fontface, rep("plain", nrow(df) - 1)) + } else { + rep("plain", nrow(df)) + } + ) + } } + res } diff --git a/man/df2gg.Rd b/man/df2gg.Rd index 22a76862..ace83168 100644 --- a/man/df2gg.Rd +++ b/man/df2gg.Rd @@ -11,7 +11,8 @@ df2gg( col_labels = TRUE, col_lab_fontface = "bold", hline = TRUE, - bg_fill = NULL + bg_fill = NULL, + add_proper_xaxis = FALSE ) } \arguments{ @@ -28,6 +29,8 @@ df2gg( \item{hline}{Logical, whether to draw a horizontal line below the column labels.} \item{bg_fill}{Optional color string for the plot background.} + +\item{add_proper_xaxis}{Logical, whether to add a proper x-axis with column values.} } \value{ A \code{ggplot2} object representing the table. From 4a5a2589e9731969a9231b00b66ce6baca611f22 Mon Sep 17 00:00:00 2001 From: melkiades Date: Thu, 11 Dec 2025 15:34:34 +0100 Subject: [PATCH 42/51] fix order of values --- R/gg_km_utils.R | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/R/gg_km_utils.R b/R/gg_km_utils.R index 9b55563c..e82a3932 100644 --- a/R/gg_km_utils.R +++ b/R/gg_km_utils.R @@ -44,12 +44,14 @@ df2gg <- function(df, colwidths = NULL, font_size = 10, col_labels = TRUE, dplyr::arrange(row_name, col_name) |> mutate( col_name = as.numeric(col_name), - row_name = factor(row_name, levels = unique(row_name)) + row_name = factor(row_name, levels = row.names(df)) ) res <- ggplot2::ggplot(data = df_long) + ggplot2::theme_void() + - ggplot2::annotate("text", x = df_long$col_name, y = df_long$row_name, label = df_long$value, size = font_size / .pt) - + ggplot2::annotate("text", + x = df_long$col_name, y = rev(df_long$row_name), # why rev? + label = df_long$value, size = font_size / .pt + ) } else { res <- ggplot2::ggplot(data = df) + ggplot2::theme_void() + From 4c14a69584302dfb1bfd9c1b98e9942e9722b912 Mon Sep 17 00:00:00 2001 From: melkiades Date: Thu, 11 Dec 2025 15:42:09 +0100 Subject: [PATCH 43/51] example --- R/annotate_gg_km.R | 6 ++++++ man/annotate_gg_km.Rd | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/R/annotate_gg_km.R b/R/annotate_gg_km.R index 880dd91e..f2eb74dc 100644 --- a/R/annotate_gg_km.R +++ b/R/annotate_gg_km.R @@ -133,6 +133,12 @@ annotate_surv_med <- function(gg_plt, fit_km, ...) { #' # Annotate Plot with Numbers at Risk Table #' annotate_risk(plt_kmg01, fit_kmg01) #' +#' # Change order of y-axis (arm) +#' use_lung2 <- use_lung +#' use_lung2$arm <- factor(use_lung2$arm, levels = c("C", "B", "A")) +#' fit_kmg01 <- survival::survfit(formula, use_lung2) +#' annotate_risk(plt_kmg01, fit_kmg01) # rerun gg_km to change legend order +#' #' @export annotate_risk <- function(gg_plt, fit_km, title = "Patients at Risk:", rel_height_plot = 0.75, xlab = "Days", diff --git a/man/annotate_gg_km.Rd b/man/annotate_gg_km.Rd index e658ce2d..2ba11f17 100644 --- a/man/annotate_gg_km.Rd +++ b/man/annotate_gg_km.Rd @@ -101,6 +101,12 @@ annotate_surv_med(plt_kmg01, fit_kmg01) # Annotate Plot with Numbers at Risk Table annotate_risk(plt_kmg01, fit_kmg01) +# Change order of y-axis (arm) +use_lung2 <- use_lung +use_lung2$arm <- factor(use_lung2$arm, levels = c("C", "B", "A")) +fit_kmg01 <- survival::survfit(formula, use_lung2) +annotate_risk(plt_kmg01, fit_kmg01) # rerun gg_km to change legend order + # Annotate Kaplan-Meier Plot with Cox-PH Table coxph_tbl <- get_cox_pairwise_df(formula, data = use_lung, arm = "arm", ref_group = "A") annotate_coxph(plt_kmg01, coxph_tbl) From 1afde94af772a09c098f13e659b1e4b08cd6e967 Mon Sep 17 00:00:00 2001 From: melkiades Date: Thu, 11 Dec 2025 15:58:02 +0100 Subject: [PATCH 44/51] remove tibble --- R/gg_km_utils.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/gg_km_utils.R b/R/gg_km_utils.R index e82a3932..eaf013d5 100644 --- a/R/gg_km_utils.R +++ b/R/gg_km_utils.R @@ -34,7 +34,7 @@ df2gg <- function(df, colwidths = NULL, font_size = 10, col_labels = TRUE, if (add_proper_xaxis) { df_long <- df |> # 1. Ensure the row names ('A', 'B', 'C') are a column named 'row_name' - tibble::rownames_to_column("row_name") |> + mutate(row_name = row.names(df)) |> # 2. Pivot the remaining columns (starting from '0' to the end) longer tidyr::pivot_longer( cols = -row_name, # Select all columns EXCEPT 'row_name' From cde1575437efcacd1bd72ffce00f32b71940235a Mon Sep 17 00:00:00 2001 From: melkiades Date: Thu, 11 Dec 2025 16:04:37 +0100 Subject: [PATCH 45/51] fix docs --- R/annotate_gg_km.R | 166 ++++++++++++++++++++---------------------- man/annotate_gg_km.Rd | 51 +++++++------ 2 files changed, 104 insertions(+), 113 deletions(-) diff --git a/R/annotate_gg_km.R b/R/annotate_gg_km.R index f2eb74dc..654179ae 100644 --- a/R/annotate_gg_km.R +++ b/R/annotate_gg_km.R @@ -5,33 +5,8 @@ #' including median survival times, numbers at risk, and cox proportional hazards results. #' The annotations are added using the `cowplot` package for flexible placement. #' -#' @seealso [gg_km()], [process_survfit()], and [get_cox_pairwise_df()] for related functionalities. -#' -#' @examples -#' # Preparing the Kaplan-Meier Plot -#' use_lung <- survival::lung -#' use_lung$arm <- factor(sample(c("A", "B", "C"), nrow(use_lung), replace = TRUE)) -#' use_lung$status <- use_lung$status - 1 # Convert status to 0/1 -#' use_lung <- na.omit(use_lung) -#' -#' formula <- survival::Surv(time, status) ~ arm -#' fit_kmg01 <- survival::survfit(formula, use_lung) -#' surv_plot_data <- process_survfit(fit_kmg01) -#' -#' plt_kmg01 <- gg_km(surv_plot_data) -#' -#' @name annotate_gg_km -NULL - -#' @describeIn annotate_gg_km The `annotate_surv_med` function adds a median survival time summary table as an -#' annotation box. -#' #' @param gg_plt (`ggplot2` or `cowplot`)\cr -#' The primary plot object (either a `ggplot2` or `cowplot` object) of the Kaplan-Meier plot to which the median -#' survival table annotation will be added. -#' @param fit_km (`survfit`)\cr -#' A fitted Kaplan-Meier object of class `survfit` (from the `survival` package). This object contains the necessary -#' survival data used to calculate and generate the content displayed in the annotation table. +#' The primary plot object (either a `ggplot2` or `cowplot` object) of the Kaplan-Meier plot. #' @param ... Additional arguments passed to the control list for the annotation box. #' These arguments override the default values. #' Accepted arguments include: @@ -50,73 +25,31 @@ NULL #' is \code{10}. #' } #' -#' @return The function `annotate_surv_med` returns a `cowplot` object with the median survival table annotation -#' added, ready for final display or saving. +#' @seealso [gg_km()], [process_survfit()], and [get_cox_pairwise_df()] for related functionalities. #' #' @examples -#' # Annotate Kaplan-Meier Plot with Median Survival Table -#' annotate_surv_med(plt_kmg01, fit_kmg01) +#' # Preparing the Kaplan-Meier Plot +#' use_lung <- survival::lung +#' use_lung$arm <- factor(sample(c("A", "B", "C"), nrow(use_lung), replace = TRUE)) +#' use_lung$status <- use_lung$status - 1 # Convert status to 0/1 +#' use_lung <- na.omit(use_lung) #' -#' @export -annotate_surv_med <- function(gg_plt, fit_km, ...) { - set_cli_abort_call() - default_eargs <- list( - x = 0.8, - y = 0.85, - w = 0.32, - h = 0.16, - font_size = 10, - fill = TRUE - ) - eargs <- list(...) - eargs <- utils::modifyList(default_eargs, eargs) - - # Checks - check_class(fit_km, "survfit") - check_class(gg_plt, c("gg", "ggplot", "cowplot")) - - # Check position/size (x, y, w, h, font_size) - Must be single non-missing numeric - for (arg_name in c("x", "y", "w", "h", "font_size")) { - check_numeric(eargs[[arg_name]]) - } - check_logical(eargs[["fill"]]) - - # Determine strata_levels for h_tbl_median_surv, assuming it's available in the calling environment or logic should - # be updated. For now, keeping as is, but this typically requires strata_levels or inferring it from fit_km - strata_levels <- if (is.null(fit_km$strata)) "All" else levels(fit_km$strata) # Placeholder for strata_levels - - surv_med_tbl <- h_tbl_median_surv(fit_km = fit_km, strata_levels = strata_levels) - bg_fill <- if (isTRUE(eargs[["fill"]])) "#00000020" else eargs[["fill"]] - - gg_surv_med <- df2gg(surv_med_tbl, font_size = eargs[["font_size"]], colwidths = c(1, 1, 2), bg_fill = bg_fill) + - ggplot2::theme( - axis.text.y = ggplot2::element_text(size = eargs[["font_size"]], face = "italic", hjust = 1), - plot.margin = ggplot2::margin(0, 2, 0, 5) - ) + - ggplot2::coord_cartesian(clip = "off", ylim = c(0.5, nrow(surv_med_tbl) + 1.5)) - gg_surv_med <- suppressMessages( - gg_surv_med + - ggplot2::scale_x_continuous(expand = c(0.025, 0)) + - ggplot2::scale_y_continuous(labels = rev(rownames(surv_med_tbl)), breaks = seq_len(nrow(surv_med_tbl))) - ) +#' formula <- survival::Surv(time, status) ~ arm +#' fit_kmg01 <- survival::survfit(formula, use_lung) +#' surv_plot_data <- process_survfit(fit_kmg01) +#' +#' plt_kmg01 <- gg_km(surv_plot_data) +#' +#' @name annotate_gg_km +NULL - gg_plt <- cowplot::ggdraw(gg_plt) + - cowplot::draw_plot( - gg_surv_med, eargs[["x"]], eargs[["y"]], - width = eargs[["w"]], height = eargs[["h"]], - vjust = 0.5, hjust = 0.5 - ) - gg_plt -} #' @describeIn annotate_gg_km The function `annotate_risk` adds a "Numbers at Risk" table below a #' Kaplan-Meier plot ([gg_km()]) using `cowplot::plot_grid`. #' -#' @param gg_plt (`ggplot2` or `cowplot`)\cr -#' The primary plot object (either a `ggplot2` or `cowplot` object) of the Kaplan-Meier plot. #' @param fit_km (`survfit`)\cr -#' A fitted Kaplan-Meier object of class `survfit` (from the `survival` package). This object contains -#' the necessary survival data used to calculate the numbers at risk. +#' A fitted Kaplan-Meier object of class `survfit` (from the `survival` package). This object contains the necessary +#' survival data used to calculate and generate the content displayed in the annotation table. #' @param title (`string`)\cr #' A single logical value indicating whether to include a above the table. Defaults to #' `""Patients at Risk:""`. If `NULL`, no title is added. @@ -129,6 +62,7 @@ annotate_surv_med <- function(gg_plt, fit_km, ...) { #' time (e.g., "Time (Days)"). #' @return The function `annotate_risk` returns a `cowplot` object combining the KM plot and the 'Numbers at Risk' #' table. +#' #' @examples #' # Annotate Plot with Numbers at Risk Table #' annotate_risk(plt_kmg01, fit_kmg01) @@ -220,13 +154,71 @@ annotate_risk <- function(gg_plt, fit_km, title = "Patients at Risk:", gg_plt } +#' @describeIn annotate_gg_km The `annotate_surv_med` function adds a median survival time summary table as an +#' annotation box. +#' +#' @return The function `annotate_surv_med` returns a `cowplot` object with the median survival table annotation +#' added, ready for final display or saving. +#' +#' @examples +#' # Annotate Kaplan-Meier Plot with Median Survival Table +#' annotate_surv_med(plt_kmg01, fit_kmg01) +#' +#' @export +annotate_surv_med <- function(gg_plt, fit_km, ...) { + set_cli_abort_call() + default_eargs <- list( + x = 0.8, + y = 0.85, + w = 0.32, + h = 0.16, + font_size = 10, + fill = TRUE + ) + eargs <- list(...) + eargs <- utils::modifyList(default_eargs, eargs) + + # Checks + check_class(fit_km, "survfit") + check_class(gg_plt, c("gg", "ggplot", "cowplot")) + + # Check position/size (x, y, w, h, font_size) - Must be single non-missing numeric + for (arg_name in c("x", "y", "w", "h", "font_size")) { + check_numeric(eargs[[arg_name]]) + } + check_logical(eargs[["fill"]]) + + # Determine strata_levels for h_tbl_median_surv, assuming it's available in the calling environment or logic should + # be updated. For now, keeping as is, but this typically requires strata_levels or inferring it from fit_km + strata_levels <- if (is.null(fit_km$strata)) "All" else levels(fit_km$strata) # Placeholder for strata_levels + + surv_med_tbl <- h_tbl_median_surv(fit_km = fit_km, strata_levels = strata_levels) + bg_fill <- if (isTRUE(eargs[["fill"]])) "#00000020" else eargs[["fill"]] + + gg_surv_med <- df2gg(surv_med_tbl, font_size = eargs[["font_size"]], colwidths = c(1, 1, 2), bg_fill = bg_fill) + + ggplot2::theme( + axis.text.y = ggplot2::element_text(size = eargs[["font_size"]], face = "italic", hjust = 1), + plot.margin = ggplot2::margin(0, 2, 0, 5) + ) + + ggplot2::coord_cartesian(clip = "off", ylim = c(0.5, nrow(surv_med_tbl) + 1.5)) + gg_surv_med <- suppressMessages( + gg_surv_med + + ggplot2::scale_x_continuous(expand = c(0.025, 0)) + + ggplot2::scale_y_continuous(labels = rev(rownames(surv_med_tbl)), breaks = seq_len(nrow(surv_med_tbl))) + ) + + gg_plt <- cowplot::ggdraw(gg_plt) + + cowplot::draw_plot( + gg_surv_med, eargs[["x"]], eargs[["y"]], + width = eargs[["w"]], height = eargs[["h"]], + vjust = 0.5, hjust = 0.5 + ) + gg_plt +} #' @describeIn annotate_gg_km The function `annotate_coxph()` adds a Cox Proportional Hazards summary table created by #' the function [get_cox_pairwise_df()] as an annotation box. #' -#' @param gg_plt (`ggplot2` or `cowplot`)\cr -#' The primary plot object (either a `ggplot2` or `cowplot` object) of the Kaplan-Meier plot to which -#' the Cox-PH annotation table will be added. #' @param coxph_tbl (`data.frame`)\cr #' A data frame containing the pre-calculated Cox-PH results, typically from a function like `get_cox_pairwise_df`. #' This data is used to generate the annotation table content. diff --git a/man/annotate_gg_km.Rd b/man/annotate_gg_km.Rd index 2ba11f17..546863f6 100644 --- a/man/annotate_gg_km.Rd +++ b/man/annotate_gg_km.Rd @@ -2,13 +2,11 @@ % Please edit documentation in R/annotate_gg_km.R \name{annotate_gg_km} \alias{annotate_gg_km} -\alias{annotate_surv_med} \alias{annotate_risk} +\alias{annotate_surv_med} \alias{annotate_coxph} \title{Annotate Kaplan-Meier Plot} \usage{ -annotate_surv_med(gg_plt, fit_km, ...) - annotate_risk( gg_plt, fit_km, @@ -18,26 +16,17 @@ annotate_risk( ... ) +annotate_surv_med(gg_plt, fit_km, ...) + annotate_coxph(gg_plt, coxph_tbl, ...) } \arguments{ \item{gg_plt}{(\code{ggplot2} or \code{cowplot})\cr -The primary plot object (either a \code{ggplot2} or \code{cowplot} object) of the Kaplan-Meier plot to which -the Cox-PH annotation table will be added.} +The primary plot object (either a \code{ggplot2} or \code{cowplot} object) of the Kaplan-Meier plot.} \item{fit_km}{(\code{survfit})\cr -A fitted Kaplan-Meier object of class \code{survfit} (from the \code{survival} package). This object contains -the necessary survival data used to calculate the numbers at risk.} - -\item{...}{Additional arguments passed to the control list for the annotation box. -These arguments override the default values. -Accepted arguments include: -\itemize{ -\item \code{x} (\code{numeric}): X-coordinate for the box anchor position (0 to 1). Default is \code{0.29}. -\item \code{y} (\code{numeric}): Y-coordinate for the box anchor position (0 to 1). Default is \code{0.51}. -\item \code{w} (\code{numeric}): Width of the annotation box (0 to 1). Default is \code{0.4}. -\item \code{h} (\code{numeric}): Height of the annotation box (0 to 1). Default is \code{0.125}. -}} +A fitted Kaplan-Meier object of class \code{survfit} (from the \code{survival} package). This object contains the necessary +survival data used to calculate and generate the content displayed in the annotation table.} \item{title}{(\code{string})\cr A single logical value indicating whether to include a above the table. Defaults to @@ -52,17 +41,27 @@ more vertical space. Defaults to \code{0.75}.} A single character string for the \strong{x-axis label} on the 'at-risk' table. This typically represents time (e.g., "Time (Days)").} +\item{...}{Additional arguments passed to the control list for the annotation box. +These arguments override the default values. +Accepted arguments include: +\itemize{ +\item \code{x} (\code{numeric}): X-coordinate for the box anchor position (0 to 1). Default is \code{0.29}. +\item \code{y} (\code{numeric}): Y-coordinate for the box anchor position (0 to 1). Default is \code{0.51}. +\item \code{w} (\code{numeric}): Width of the annotation box (0 to 1). Default is \code{0.4}. +\item \code{h} (\code{numeric}): Height of the annotation box (0 to 1). Default is \code{0.125}. +}} + \item{coxph_tbl}{(\code{data.frame})\cr A data frame containing the pre-calculated Cox-PH results, typically from a function like \code{get_cox_pairwise_df}. This data is used to generate the annotation table content.} } \value{ -The function \code{annotate_surv_med} returns a \code{cowplot} object with the median survival table annotation -added, ready for final display or saving. - The function \code{annotate_risk} returns a \code{cowplot} object combining the KM plot and the 'Numbers at Risk' table. +The function \code{annotate_surv_med} returns a \code{cowplot} object with the median survival table annotation +added, ready for final display or saving. + The function \code{annotate_coxph} returns a \code{cowplot} object with the Cox-PH table annotation added. } \description{ @@ -72,12 +71,12 @@ The annotations are added using the \code{cowplot} package for flexible placemen } \section{Functions}{ \itemize{ -\item \code{annotate_surv_med()}: The \code{annotate_surv_med} function adds a median survival time summary table as an -annotation box. - \item \code{annotate_risk()}: The function \code{annotate_risk} adds a "Numbers at Risk" table below a Kaplan-Meier plot (\code{\link[=gg_km]{gg_km()}}) using \code{cowplot::plot_grid}. +\item \code{annotate_surv_med()}: The \code{annotate_surv_med} function adds a median survival time summary table as an +annotation box. + \item \code{annotate_coxph()}: The function \code{annotate_coxph()} adds a Cox Proportional Hazards summary table created by the function \code{\link[=get_cox_pairwise_df]{get_cox_pairwise_df()}} as an annotation box. @@ -95,9 +94,6 @@ surv_plot_data <- process_survfit(fit_kmg01) plt_kmg01 <- gg_km(surv_plot_data) -# Annotate Kaplan-Meier Plot with Median Survival Table -annotate_surv_med(plt_kmg01, fit_kmg01) - # Annotate Plot with Numbers at Risk Table annotate_risk(plt_kmg01, fit_kmg01) @@ -107,6 +103,9 @@ use_lung2$arm <- factor(use_lung2$arm, levels = c("C", "B", "A")) fit_kmg01 <- survival::survfit(formula, use_lung2) annotate_risk(plt_kmg01, fit_kmg01) # rerun gg_km to change legend order +# Annotate Kaplan-Meier Plot with Median Survival Table +annotate_surv_med(plt_kmg01, fit_kmg01) + # Annotate Kaplan-Meier Plot with Cox-PH Table coxph_tbl <- get_cox_pairwise_df(formula, data = use_lung, arm = "arm", ref_group = "A") annotate_coxph(plt_kmg01, coxph_tbl) From 1c66e79e5194348298ea542fbf59f4a7445afa26 Mon Sep 17 00:00:00 2001 From: melkiades Date: Thu, 11 Dec 2025 16:06:35 +0100 Subject: [PATCH 46/51] change name --- NAMESPACE | 2 +- R/annotate_gg_km.R | 10 +++++----- R/gg_km.R | 2 +- man/annotate_gg_km.Rd | 12 ++++++------ man/gg_km.Rd | 2 +- tests/testthat/test-gg_km.R | 2 +- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index fbe98ee9..63f10b27 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,7 +13,7 @@ export(add_difference_row) export(add_hierarchical_count_row) export(add_overall) export(annotate_coxph) -export(annotate_risk) +export(annotate_riskdf) export(annotate_surv_med) export(filter_hierarchical) export(get_cox_pairwise_df) diff --git a/R/annotate_gg_km.R b/R/annotate_gg_km.R index 654179ae..38beb12c 100644 --- a/R/annotate_gg_km.R +++ b/R/annotate_gg_km.R @@ -44,7 +44,7 @@ NULL -#' @describeIn annotate_gg_km The function `annotate_risk` adds a "Numbers at Risk" table below a +#' @describeIn annotate_gg_km The function `annotate_riskdf` adds a "Numbers at Risk" table below a #' Kaplan-Meier plot ([gg_km()]) using `cowplot::plot_grid`. #' #' @param fit_km (`survfit`)\cr @@ -60,21 +60,21 @@ NULL #' @param xlab (`character`)\cr #' A single character string for the **x-axis label** on the 'at-risk' table. This typically represents #' time (e.g., "Time (Days)"). -#' @return The function `annotate_risk` returns a `cowplot` object combining the KM plot and the 'Numbers at Risk' +#' @return The function `annotate_riskdf` returns a `cowplot` object combining the KM plot and the 'Numbers at Risk' #' table. #' #' @examples #' # Annotate Plot with Numbers at Risk Table -#' annotate_risk(plt_kmg01, fit_kmg01) +#' annotate_riskdf(plt_kmg01, fit_kmg01) #' #' # Change order of y-axis (arm) #' use_lung2 <- use_lung #' use_lung2$arm <- factor(use_lung2$arm, levels = c("C", "B", "A")) #' fit_kmg01 <- survival::survfit(formula, use_lung2) -#' annotate_risk(plt_kmg01, fit_kmg01) # rerun gg_km to change legend order +#' annotate_riskdf(plt_kmg01, fit_kmg01) # rerun gg_km to change legend order #' #' @export -annotate_risk <- function(gg_plt, fit_km, title = "Patients at Risk:", +annotate_riskdf <- function(gg_plt, fit_km, title = "Patients at Risk:", rel_height_plot = 0.75, xlab = "Days", ...) { check_class(gg_plt, c("gg", "ggplot", "cowplot")) diff --git a/R/gg_km.R b/R/gg_km.R index ee706c4e..fbba18ae 100644 --- a/R/gg_km.R +++ b/R/gg_km.R @@ -4,7 +4,7 @@ #' This set of functions facilitates the creation of Kaplan-Meier survival plots using `ggplot2`. Use #' `process_survfit()` to prepare the survival data from a fitted `survfit` object, and then #' `gg_km()` to generate the Kaplan-Meier plot with various customization options. Additional functions -#' like `annot_surv_med()`, `annot_cox_ph()`, and `annotate_risk()` allow for adding summary tables and +#' like `annot_surv_med()`, `annot_cox_ph()`, and `annotate_riskdf()` allow for adding summary tables and #' annotations to the plot. #' #' @name gg_km diff --git a/man/annotate_gg_km.Rd b/man/annotate_gg_km.Rd index 546863f6..f25d6668 100644 --- a/man/annotate_gg_km.Rd +++ b/man/annotate_gg_km.Rd @@ -2,12 +2,12 @@ % Please edit documentation in R/annotate_gg_km.R \name{annotate_gg_km} \alias{annotate_gg_km} -\alias{annotate_risk} +\alias{annotate_riskdf} \alias{annotate_surv_med} \alias{annotate_coxph} \title{Annotate Kaplan-Meier Plot} \usage{ -annotate_risk( +annotate_riskdf( gg_plt, fit_km, title = "Patients at Risk:", @@ -56,7 +56,7 @@ A data frame containing the pre-calculated Cox-PH results, typically from a func This data is used to generate the annotation table content.} } \value{ -The function \code{annotate_risk} returns a \code{cowplot} object combining the KM plot and the 'Numbers at Risk' +The function \code{annotate_riskdf} returns a \code{cowplot} object combining the KM plot and the 'Numbers at Risk' table. The function \code{annotate_surv_med} returns a \code{cowplot} object with the median survival table annotation @@ -71,7 +71,7 @@ The annotations are added using the \code{cowplot} package for flexible placemen } \section{Functions}{ \itemize{ -\item \code{annotate_risk()}: The function \code{annotate_risk} adds a "Numbers at Risk" table below a +\item \code{annotate_riskdf()}: The function \code{annotate_riskdf} adds a "Numbers at Risk" table below a Kaplan-Meier plot (\code{\link[=gg_km]{gg_km()}}) using \code{cowplot::plot_grid}. \item \code{annotate_surv_med()}: The \code{annotate_surv_med} function adds a median survival time summary table as an @@ -95,13 +95,13 @@ surv_plot_data <- process_survfit(fit_kmg01) plt_kmg01 <- gg_km(surv_plot_data) # Annotate Plot with Numbers at Risk Table -annotate_risk(plt_kmg01, fit_kmg01) +annotate_riskdf(plt_kmg01, fit_kmg01) # Change order of y-axis (arm) use_lung2 <- use_lung use_lung2$arm <- factor(use_lung2$arm, levels = c("C", "B", "A")) fit_kmg01 <- survival::survfit(formula, use_lung2) -annotate_risk(plt_kmg01, fit_kmg01) # rerun gg_km to change legend order +annotate_riskdf(plt_kmg01, fit_kmg01) # rerun gg_km to change legend order # Annotate Kaplan-Meier Plot with Median Survival Table annotate_surv_med(plt_kmg01, fit_kmg01) diff --git a/man/gg_km.Rd b/man/gg_km.Rd index 0997c5da..7063b19a 100644 --- a/man/gg_km.Rd +++ b/man/gg_km.Rd @@ -79,7 +79,7 @@ The function \code{gg_km} returns a \code{ggplot2} object of the KM plot. This set of functions facilitates the creation of Kaplan-Meier survival plots using \code{ggplot2}. Use \code{process_survfit()} to prepare the survival data from a fitted \code{survfit} object, and then \code{gg_km()} to generate the Kaplan-Meier plot with various customization options. Additional functions -like \code{annot_surv_med()}, \code{annot_cox_ph()}, and \code{annotate_risk()} allow for adding summary tables and +like \code{annot_surv_med()}, \code{annot_cox_ph()}, and \code{annotate_riskdf()} allow for adding summary tables and annotations to the plot. } \details{ diff --git a/tests/testthat/test-gg_km.R b/tests/testthat/test-gg_km.R index 083f3dcd..77f3655b 100644 --- a/tests/testthat/test-gg_km.R +++ b/tests/testthat/test-gg_km.R @@ -33,6 +33,6 @@ test_that("gg_km() works with default inputs", { plt_kmg01 <- gg_km(surv_plot_data) |> annotate_surv_med(fit_kmg01) |> annotate_coxph(coxph_tbl) |> - annotate_risk(fit_kmg01) + annotate_riskdf(fit_kmg01) ) }) From 74c794f45276c58d353461d60f8d5d706c6fbe8b Mon Sep 17 00:00:00 2001 From: melkiades Date: Tue, 16 Dec 2025 17:01:13 +0100 Subject: [PATCH 47/51] fixes --- R/gg_km.R | 5 +-- R/gg_km_utils.R | 87 +++++++++++++++++++++++++++++++++++-------------- man/df2gg.Rd | 19 +++++++++++ man/gg_km.Rd | 4 --- 4 files changed, 83 insertions(+), 32 deletions(-) diff --git a/R/gg_km.R b/R/gg_km.R index fbba18ae..e1326b79 100644 --- a/R/gg_km.R +++ b/R/gg_km.R @@ -118,8 +118,6 @@ process_survfit <- function(fit_km, #' A single numeric value specifying the **line width** for the survival curves. #' @param censor_show (`logical`)\cr #' A single logical value indicating whether to display **censoring marks** on the plot. Defaults to `TRUE`. -#' @param pch (`numeric`)\cr -#' A single numeric value specifying the **plotting character** (point shape code) for censoring marks. #' @param size (`numeric`)\cr #' A single numeric value specifying the **size** of the censoring marks. #' @param max_time (`numeric`)\cr @@ -161,7 +159,6 @@ gg_km <- function(surv_plot_data, lty = NULL, lwd = 0.5, censor_show = TRUE, - pch = 3, size = 2, max_time = NULL, xticks = NULL, @@ -295,7 +292,7 @@ gg_km <- function(surv_plot_data, size = size, na.rm = TRUE ) + - ggplot2::scale_shape_manual(name = NULL, values = pch) + + ggplot2::scale_shape_manual(values = 3) + ggplot2::guides(fill = ggplot2::guide_legend(override.aes = list(shape = NA))) } diff --git a/R/gg_km_utils.R b/R/gg_km_utils.R index eaf013d5..f90679d4 100644 --- a/R/gg_km_utils.R +++ b/R/gg_km_utils.R @@ -13,9 +13,28 @@ #' #' @return A \code{ggplot2} object representing the table. #' +#' @examples +#' # Example using proper x-axis +#' df <- as.data.frame(matrix(c( +#' # 0, 250, 500, 750, 1000 <-- (Reference) +#' 54, 28, 10, 3, 0, +#' 59, 35, 16, 5, 1, +#' 54, 25, 4, 0, 0 +#' ), nrow = 3, byrow = TRUE)) +#' +#' # Set names manually +#' colnames(df) <- c("0", "250", "500", "750", "1000") +#' rownames(df) <- c("A", "B", "C") +#' +#' df2gg(df, font_size = 8, add_proper_xaxis = TRUE) +#' +#' # Example without proper x-axis +#' df2gg(df, font_size = 8, add_proper_xaxis = FALSE, hline = FALSE) +#' #' @keywords internal df2gg <- function(df, colwidths = NULL, font_size = 10, col_labels = TRUE, col_lab_fontface = "bold", hline = TRUE, bg_fill = NULL, add_proper_xaxis = FALSE) { + # Convert all values to character, replacing NAs with "NA" df <- as.data.frame(apply(df, 1:2, function(x) { if (is.na(x)) { "NA" @@ -23,28 +42,37 @@ df2gg <- function(df, colwidths = NULL, font_size = 10, col_labels = TRUE, as.character(x) } })) + + # Add column labels as first row if specified if (col_labels) { df <- as.matrix(df) df <- rbind(colnames(df), df) } - if (is.null(colwidths)) { - colwidths <- apply(df, 2, function(x) max(nchar(x), na.rm = TRUE)) - } - tot_width <- sum(colwidths) + + # Create ggplot2 object with x-axis specified in df if (add_proper_xaxis) { + # Determine column widths if not provided + if (is.null(colwidths)) { + tot_width <- max(colnames(df) |> as.numeric(), na.rm = TRUE) + colwidths <- rep(floor(tot_width / ncol(df)), ncol(df)) + } else { + tot_width <- sum(colwidths) + } + df_long <- df |> + as.data.frame() |> # 1. Ensure the row names ('A', 'B', 'C') are a column named 'row_name' - mutate(row_name = row.names(df)) |> + dplyr::mutate(row_name = row.names(df)) |> # 2. Pivot the remaining columns (starting from '0' to the end) longer tidyr::pivot_longer( - cols = -row_name, # Select all columns EXCEPT 'row_name' + cols = -.data$row_name, # Select all columns EXCEPT 'row_name' names_to = "col_name", # Name the new column containing the old column headers values_to = "value" # Name the new column containing the data values ) |> - dplyr::arrange(row_name, col_name) |> - mutate( - col_name = as.numeric(col_name), - row_name = factor(row_name, levels = row.names(df)) + dplyr::arrange(.data$row_name, .data$col_name) |> + dplyr::mutate( + col_name = as.numeric(.data$col_name), + row_name = factor(.data$row_name, levels = row.names(df)) ) res <- ggplot2::ggplot(data = df_long) + ggplot2::theme_void() + @@ -52,24 +80,21 @@ df2gg <- function(df, colwidths = NULL, font_size = 10, col_labels = TRUE, x = df_long$col_name, y = rev(df_long$row_name), # why rev? label = df_long$value, size = font_size / .pt ) + + # Create ggplot2 object with a specific x-axis based on column widths } else { + # Determine column widths if not provided + if (is.null(colwidths)) { + colwidths <- apply(df, 2, function(x) max(nchar(x), na.rm = TRUE)) + } + tot_width <- sum(colwidths) + res <- ggplot2::ggplot(data = df) + ggplot2::theme_void() + - ggplot2::scale_x_continuous(limits = c( - 0, - tot_width - )) + + ggplot2::scale_x_continuous(limits = c(0, tot_width)) + ggplot2::scale_y_continuous(limits = c(1, nrow(df))) - if (!is.null(bg_fill)) { - res <- res + ggplot2::theme(plot.background = ggplot2::element_rect(fill = bg_fill)) - } - if (hline) { - res <- res + ggplot2::annotate("segment", - x = 0 + 0.2 * colwidths[2], - xend = tot_width - 0.1 * tail(colwidths, 1), y = nrow(df) - - 0.5, yend = nrow(df) - 0.5 - ) - } + + for (i in seq_len(ncol(df))) { line_pos <- c( if (i == 1) { @@ -90,6 +115,20 @@ df2gg <- function(df, colwidths = NULL, font_size = 10, col_labels = TRUE, } } + # Add horizontal line if specified + if (hline) { + res <- res + ggplot2::annotate( + "segment", + x = 0 + 0.2 * colwidths[2], xend = tot_width - 0.1 * tail(colwidths, 1), + y = nrow(df) - 0.5, yend = nrow(df) - 0.5 + ) + } + + # Set background fill if specified + if (!is.null(bg_fill)) { + res <- res + ggplot2::theme(plot.background = ggplot2::element_rect(fill = bg_fill)) + } + res } diff --git a/man/df2gg.Rd b/man/df2gg.Rd index ace83168..7e4d74f5 100644 --- a/man/df2gg.Rd +++ b/man/df2gg.Rd @@ -37,5 +37,24 @@ A \code{ggplot2} object representing the table. } \description{ Creates a \code{ggplot2} object that renders a data frame as a table graphic. +} +\examples{ +# Example using proper x-axis +df <- as.data.frame(matrix(c( + # 0, 250, 500, 750, 1000 <-- (Reference) + 54, 28, 10, 3, 0, + 59, 35, 16, 5, 1, + 54, 25, 4, 0, 0 +), nrow = 3, byrow = TRUE)) + +# Set names manually +colnames(df) <- c("0", "250", "500", "750", "1000") +rownames(df) <- c("A", "B", "C") + +df2gg(df, font_size = 8, add_proper_xaxis = TRUE) + +# Example without proper x-axis +df2gg(df, font_size = 8, add_proper_xaxis = FALSE, hline = FALSE) + } \keyword{internal} diff --git a/man/gg_km.Rd b/man/gg_km.Rd index 7063b19a..1583f690 100644 --- a/man/gg_km.Rd +++ b/man/gg_km.Rd @@ -12,7 +12,6 @@ gg_km( lty = NULL, lwd = 0.5, censor_show = TRUE, - pch = 3, size = 2, max_time = NULL, xticks = NULL, @@ -46,9 +45,6 @@ A single numeric value specifying the \strong{line width} for the survival curve \item{censor_show}{(\code{logical})\cr A single logical value indicating whether to display \strong{censoring marks} on the plot. Defaults to \code{TRUE}.} -\item{pch}{(\code{numeric})\cr -A single numeric value specifying the \strong{plotting character} (point shape code) for censoring marks.} - \item{size}{(\code{numeric})\cr A single numeric value specifying the \strong{size} of the censoring marks.} From 2282e026efb7e33892275412be0694e044618a5b Mon Sep 17 00:00:00 2001 From: melkiades Date: Tue, 16 Dec 2025 17:13:36 +0100 Subject: [PATCH 48/51] small check --- R/gg_km.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/gg_km.R b/R/gg_km.R index e1326b79..8e4f50e3 100644 --- a/R/gg_km.R +++ b/R/gg_km.R @@ -186,6 +186,7 @@ gg_km <- function(surv_plot_data, ) } check_numeric(ylim, allow_empty = TRUE) + check_scalar_logical(censor_show) data <- surv_plot_data strata_levels <- levels(data$strata) @@ -285,7 +286,7 @@ gg_km <- function(surv_plot_data, ggplot2::scale_linetype_manual(values = lty) } - if (censor_show) { + if (isTRUE(censor_show)) { gg_plt <- gg_plt + ggplot2::geom_point( data = data[data$n.censor != 0, ], ggplot2::aes(x = .data[["time"]], y = .data[["censor"]], shape = "Censored"), From e5768e03a59dece4288bf008380edb46535699c2 Mon Sep 17 00:00:00 2001 From: melkiades Date: Wed, 17 Dec 2025 15:36:29 +0100 Subject: [PATCH 49/51] fix docs --- R/annotate_gg_km.R | 4 ++-- R/gg_km_utils.R | 20 +------------------- man/df2gg.Rd | 19 ------------------- tests/testthat/test-gg_km.R | 24 ++++++++++++++++++++++++ 4 files changed, 27 insertions(+), 40 deletions(-) diff --git a/R/annotate_gg_km.R b/R/annotate_gg_km.R index 38beb12c..532bc90b 100644 --- a/R/annotate_gg_km.R +++ b/R/annotate_gg_km.R @@ -75,8 +75,8 @@ NULL #' #' @export annotate_riskdf <- function(gg_plt, fit_km, title = "Patients at Risk:", - rel_height_plot = 0.75, xlab = "Days", - ...) { + rel_height_plot = 0.75, xlab = "Days", + ...) { check_class(gg_plt, c("gg", "ggplot", "cowplot")) check_class(fit_km, "survfit") check_string(title, allow_empty = TRUE) diff --git a/R/gg_km_utils.R b/R/gg_km_utils.R index f90679d4..d6eb431f 100644 --- a/R/gg_km_utils.R +++ b/R/gg_km_utils.R @@ -13,24 +13,6 @@ #' #' @return A \code{ggplot2} object representing the table. #' -#' @examples -#' # Example using proper x-axis -#' df <- as.data.frame(matrix(c( -#' # 0, 250, 500, 750, 1000 <-- (Reference) -#' 54, 28, 10, 3, 0, -#' 59, 35, 16, 5, 1, -#' 54, 25, 4, 0, 0 -#' ), nrow = 3, byrow = TRUE)) -#' -#' # Set names manually -#' colnames(df) <- c("0", "250", "500", "750", "1000") -#' rownames(df) <- c("A", "B", "C") -#' -#' df2gg(df, font_size = 8, add_proper_xaxis = TRUE) -#' -#' # Example without proper x-axis -#' df2gg(df, font_size = 8, add_proper_xaxis = FALSE, hline = FALSE) -#' #' @keywords internal df2gg <- function(df, colwidths = NULL, font_size = 10, col_labels = TRUE, col_lab_fontface = "bold", hline = TRUE, bg_fill = NULL, add_proper_xaxis = FALSE) { @@ -81,7 +63,7 @@ df2gg <- function(df, colwidths = NULL, font_size = 10, col_labels = TRUE, label = df_long$value, size = font_size / .pt ) - # Create ggplot2 object with a specific x-axis based on column widths + # Create ggplot2 object with a specific x-axis based on column widths } else { # Determine column widths if not provided if (is.null(colwidths)) { diff --git a/man/df2gg.Rd b/man/df2gg.Rd index 7e4d74f5..ace83168 100644 --- a/man/df2gg.Rd +++ b/man/df2gg.Rd @@ -37,24 +37,5 @@ A \code{ggplot2} object representing the table. } \description{ Creates a \code{ggplot2} object that renders a data frame as a table graphic. -} -\examples{ -# Example using proper x-axis -df <- as.data.frame(matrix(c( - # 0, 250, 500, 750, 1000 <-- (Reference) - 54, 28, 10, 3, 0, - 59, 35, 16, 5, 1, - 54, 25, 4, 0, 0 -), nrow = 3, byrow = TRUE)) - -# Set names manually -colnames(df) <- c("0", "250", "500", "750", "1000") -rownames(df) <- c("A", "B", "C") - -df2gg(df, font_size = 8, add_proper_xaxis = TRUE) - -# Example without proper x-axis -df2gg(df, font_size = 8, add_proper_xaxis = FALSE, hline = FALSE) - } \keyword{internal} diff --git a/tests/testthat/test-gg_km.R b/tests/testthat/test-gg_km.R index 77f3655b..9b8e392e 100644 --- a/tests/testthat/test-gg_km.R +++ b/tests/testthat/test-gg_km.R @@ -36,3 +36,27 @@ test_that("gg_km() works with default inputs", { annotate_riskdf(fit_kmg01) ) }) + +test_that("df2gg() works with proper x-axis and without", { + # Example using proper x-axis + df <- as.data.frame(matrix(c( + # 0, 250, 500, 750, 1000 <-- (Reference) + 54, 28, 10, 3, 0, + 59, 35, 16, 5, 1, + 54, 25, 4, 0, 0 + ), nrow = 3, byrow = TRUE)) + + # Set names manually + colnames(df) <- c("0", "250", "500", "750", "1000") + rownames(df) <- c("A", "B", "C") + + # Example with proper x-axis + expect_no_error( + null <- df2gg(df, font_size = 8, add_proper_xaxis = TRUE) + ) + + # Example without proper x-axis + expect_no_error( + null <- df2gg(df, font_size = 8, add_proper_xaxis = FALSE, hline = FALSE) + ) +}) From 35c423f04de80923dd4b68b6d888a3ec071517fb Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Thu, 18 Dec 2025 11:25:50 +0800 Subject: [PATCH 50/51] adding column name --- R/get_cox_pairwise_df.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/get_cox_pairwise_df.R b/R/get_cox_pairwise_df.R index 1a1f921e..67b4a26c 100644 --- a/R/get_cox_pairwise_df.R +++ b/R/get_cox_pairwise_df.R @@ -114,6 +114,8 @@ get_cox_pairwise_df <- function(model_formula, data, arm, ref_group = NULL) { rownames(current_row) <- current_arm ret <- rbind(ret, current_row) } - + names(ret) <- c("HR", + "95% CI", + "p-value (log-rank)") ret } From 899f0b96a62ec79071514f837ca4efdc853de33c Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Thu, 18 Dec 2025 11:32:16 +0800 Subject: [PATCH 51/51] fix style --- R/get_cox_pairwise_df.R | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/R/get_cox_pairwise_df.R b/R/get_cox_pairwise_df.R index 67b4a26c..61518813 100644 --- a/R/get_cox_pairwise_df.R +++ b/R/get_cox_pairwise_df.R @@ -114,8 +114,10 @@ get_cox_pairwise_df <- function(model_formula, data, arm, ref_group = NULL) { rownames(current_row) <- current_arm ret <- rbind(ret, current_row) } - names(ret) <- c("HR", - "95% CI", - "p-value (log-rank)") + names(ret) <- c( + "HR", + "95% CI", + "p-value (log-rank)" + ) ret }