diff --git a/.Rbuildignore b/.Rbuildignore index ae7e12a9..4deffee4 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -10,3 +10,4 @@ ^vignettes/articles$ ^\.gitlab-ci\.yml$ LICENSE +.lintr diff --git a/.lintr b/.lintr new file mode 100644 index 00000000..c514f393 --- /dev/null +++ b/.lintr @@ -0,0 +1,6 @@ +linters: linters_with_defaults( + line_length_linter = line_length_linter(120), + cyclocomp_linter = NULL, + object_usage_linter = NULL, + object_length_linter = NULL + ) diff --git a/DESCRIPTION b/DESCRIPTION index d020fde5..044e9802 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,15 +22,18 @@ BugReports: https://github.com/insightsengineering/crane/issues Depends: gtsummary (>= 2.5.0), R (>= 4.2) -Imports: +Imports: broom (>= 1.0.8), cards (>= 0.7.0), cardx (>= 0.3.0), + cowplot (>= 1.2.0), cli (>= 3.6.4), dplyr (>= 1.1.4), flextable (>= 0.9.7), + ggplot2 (>= 4.0.0), glue (>= 1.8.0), gt (>= 0.11.1), + labeling, lifecycle, rlang (>= 1.1.5), survival (>= 3.6-4), diff --git a/NAMESPACE b/NAMESPACE index 0ff9110d..63f10b27 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,13 +12,19 @@ export(add_blank_rows) export(add_difference_row) export(add_hierarchical_count_row) export(add_overall) +export(annotate_coxph) +export(annotate_riskdf) +export(annotate_surv_med) export(filter_hierarchical) +export(get_cox_pairwise_df) +export(gg_km) export(label_roche_number) export(label_roche_percent) export(label_roche_pvalue) export(label_roche_ratio) export(modify_header_rm_md) export(modify_zero_recode) +export(process_survfit) export(remove_duplicate_keys) export(sort_hierarchical) export(style_roche_number) @@ -36,8 +42,13 @@ export(tbl_shift) export(tbl_survfit_quantiles) export(tbl_survfit_times) export(theme_gtsummary_roche) +import(ggplot2) import(glue) import(rlang) +importFrom(broom,tidy) +importFrom(cowplot,draw_plot) +importFrom(cowplot,ggdraw) +importFrom(cowplot,plot_grid) importFrom(dplyr,"%>%") importFrom(dplyr,across) importFrom(dplyr,all_of) @@ -54,3 +65,10 @@ importFrom(gtsummary,add_difference_row) importFrom(gtsummary,add_overall) importFrom(gtsummary,filter_hierarchical) importFrom(gtsummary,sort_hierarchical) +importFrom(labeling,extended) +importFrom(stats,pchisq) +importFrom(survival,Surv) +importFrom(survival,coxph) +importFrom(survival,survdiff) +importFrom(tidyr,pivot_wider) +importFrom(utils,tail) diff --git a/NEWS.md b/NEWS.md index ab50d9b8..b622cd6d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -8,6 +8,8 @@ * Added functions `style_roche_number()` and `label_roche_number()` to replace their respective gtsummary versions, with additional `inf` argument for customization of `Inf`/`-Inf`/`NaN` values. +* Added `gg_km()` function for creating Kaplan-Meier plots. + * Added `list("assign_summary_type-arg:cat_threshold" = 0L)` to `theme_gtsummary_roche()`. Numeric variables with few levels will no longer default to summary type `'categorical'` in `gtsummary::tbl_summary()` and `tbl_roche_summary()`. (#79) ### Other Updates diff --git a/R/annotate_gg_km.R b/R/annotate_gg_km.R new file mode 100644 index 00000000..532bc90b --- /dev/null +++ b/R/annotate_gg_km.R @@ -0,0 +1,284 @@ +#' Annotate Kaplan-Meier Plot +#' +#' @description +#' These functions provide capabilities to annotate Kaplan-Meier plots ([gg_km()]) with additional summary tables, +#' including median survival times, numbers at risk, and cox proportional hazards results. +#' The annotations are added using the `cowplot` package for flexible placement. +#' +#' @param gg_plt (`ggplot2` or `cowplot`)\cr +#' The primary plot object (either a `ggplot2` or `cowplot` object) of the Kaplan-Meier plot. +#' @param ... Additional arguments passed to the control list for the annotation box. +#' These arguments override the default values. +#' Accepted arguments include: +#' \itemize{ +#' \item \code{x} (\code{numeric}): X-coordinate for the box anchor position (0 to 1). Default is +#' \code{0.8} (\code{0.29} for `annotate_coxph`). +#' \item \code{y} (\code{numeric}): Y-coordinate for the box anchor position (0 to 1). Default is +#' \code{0.85} (\code{0.51} for `annotate_coxph`). +#' \item \code{w} (\code{numeric}): Width of the annotation box (0 to 1). Default is +#' \code{0.32} (\code{0.4} for `annotate_coxph`). +#' \item \code{h} (\code{numeric}): Height of the annotation box (0 to 1). Default +#' is \code{0.16} (\code{0.125} for `annotate_coxph`). +#' \item \code{fill} (\code{logical}): Whether the annotation box should have a background fill. Default is +#' \code{TRUE}. +#' \item \code{font_size} (\code{numeric}): Base font size for the text inside the annotation box. Default +#' is \code{10}. +#' } +#' +#' @seealso [gg_km()], [process_survfit()], and [get_cox_pairwise_df()] for related functionalities. +#' +#' @examples +#' # Preparing the Kaplan-Meier Plot +#' use_lung <- survival::lung +#' use_lung$arm <- factor(sample(c("A", "B", "C"), nrow(use_lung), replace = TRUE)) +#' use_lung$status <- use_lung$status - 1 # Convert status to 0/1 +#' use_lung <- na.omit(use_lung) +#' +#' formula <- survival::Surv(time, status) ~ arm +#' fit_kmg01 <- survival::survfit(formula, use_lung) +#' surv_plot_data <- process_survfit(fit_kmg01) +#' +#' plt_kmg01 <- gg_km(surv_plot_data) +#' +#' @name annotate_gg_km +NULL + + +#' @describeIn annotate_gg_km The function `annotate_riskdf` adds a "Numbers at Risk" table below a +#' Kaplan-Meier plot ([gg_km()]) using `cowplot::plot_grid`. +#' +#' @param fit_km (`survfit`)\cr +#' A fitted Kaplan-Meier object of class `survfit` (from the `survival` package). This object contains the necessary +#' survival data used to calculate and generate the content displayed in the annotation table. +#' @param title (`string`)\cr +#' A single logical value indicating whether to include a above the table. Defaults to +#' `""Patients at Risk:""`. If `NULL`, no title is added. +#' @param rel_height_plot (`numeric`)\cr +#' A single numeric value defining the **relative height** of the main Kaplan-Meier plot area compared +#' to the 'at-risk' table. This value should be between 0 and 1, where a value closer to 1 gives the main plot +#' more vertical space. Defaults to `0.75`. +#' @param xlab (`character`)\cr +#' A single character string for the **x-axis label** on the 'at-risk' table. This typically represents +#' time (e.g., "Time (Days)"). +#' @return The function `annotate_riskdf` returns a `cowplot` object combining the KM plot and the 'Numbers at Risk' +#' table. +#' +#' @examples +#' # Annotate Plot with Numbers at Risk Table +#' annotate_riskdf(plt_kmg01, fit_kmg01) +#' +#' # Change order of y-axis (arm) +#' use_lung2 <- use_lung +#' use_lung2$arm <- factor(use_lung2$arm, levels = c("C", "B", "A")) +#' fit_kmg01 <- survival::survfit(formula, use_lung2) +#' annotate_riskdf(plt_kmg01, fit_kmg01) # rerun gg_km to change legend order +#' +#' @export +annotate_riskdf <- function(gg_plt, fit_km, title = "Patients at Risk:", + rel_height_plot = 0.75, xlab = "Days", + ...) { + check_class(gg_plt, c("gg", "ggplot", "cowplot")) + check_class(fit_km, "survfit") + check_string(title, allow_empty = TRUE) + check_scalar(rel_height_plot) + check_numeric(rel_height_plot) + if (rel_height_plot <= 0 || rel_height_plot >= 1) { + cli::cli_abort( + "{.arg rel_height_plot} must be a single {.cls numeric} value between 0 and 1 (exclusive).", + call = get_cli_abort_call() + ) + } + check_string(xlab) + default_eargs <- list( + font_size = 10 + ) + eargs <- list(...) + eargs <- utils::modifyList(default_eargs, eargs) + font_size <- eargs[["font_size"]] + check_numeric(font_size) + + data <- broom::tidy(fit_km) + xticks <- h_xticks(data = data) + annot_tbl <- summary(fit_km, times = xticks, extend = TRUE) + + # Placeholder for strata_levels, should be retrieved from fit_km or passed as argument + strata_levels <- if (is.null(fit_km$strata)) "All" else levels(fit_km$strata) + + annot_tbl <- if (is.null(fit_km$strata)) { + data.frame( + n.risk = annot_tbl$n.risk, + time = annot_tbl$time, + strata = strata_levels + ) + } else { + strata_lst <- strsplit(sub("=", "equals", levels(annot_tbl$strata)), "equals") + levels(annot_tbl$strata) <- matrix(unlist(strata_lst), ncol = 2, byrow = TRUE)[, 2] + data.frame( + n.risk = annot_tbl$n.risk, + time = annot_tbl$time, + strata = annot_tbl$strata + ) + } + + at_risk_tbl <- as.data.frame( + tidyr::pivot_wider(annot_tbl, names_from = "time", values_from = "n.risk")[, -1] + ) + at_risk_tbl[is.na(at_risk_tbl)] <- 0 + rownames(at_risk_tbl) <- levels(annot_tbl$strata) + + gg_at_risk <- df2gg( + at_risk_tbl, + font_size = eargs$font_size, col_labels = FALSE, hline = FALSE, + colwidths = rep(1, ncol(at_risk_tbl)), + add_proper_xaxis = TRUE + ) + + ggplot2::labs(title = if (!is.null(title)) title else NULL, x = xlab) + + ggplot2::theme_bw(base_size = eargs$font_size) + + ggplot2::theme( + plot.title = ggplot2::element_text(size = eargs$font_size, vjust = 3, face = "bold"), + panel.border = ggplot2::element_blank(), + panel.grid = ggplot2::element_blank(), + axis.title.y = ggplot2::element_blank(), + axis.ticks.y = ggplot2::element_blank(), + axis.text.y = ggplot2::element_text(size = eargs$font_size, face = "italic", hjust = 1), + axis.text.x = ggplot2::element_text(size = eargs$font_size), + axis.line.x = ggplot2::element_line() + ) + + ggplot2::coord_cartesian(clip = "off", ylim = c(0.5, nrow(at_risk_tbl))) + + gg_plt <- cowplot::plot_grid( + gg_plt, gg_at_risk, + align = "vh", axis = "b", ncol = 1, + rel_heights = c(rel_height_plot, 1 - rel_height_plot) + ) + gg_plt +} + +#' @describeIn annotate_gg_km The `annotate_surv_med` function adds a median survival time summary table as an +#' annotation box. +#' +#' @return The function `annotate_surv_med` returns a `cowplot` object with the median survival table annotation +#' added, ready for final display or saving. +#' +#' @examples +#' # Annotate Kaplan-Meier Plot with Median Survival Table +#' annotate_surv_med(plt_kmg01, fit_kmg01) +#' +#' @export +annotate_surv_med <- function(gg_plt, fit_km, ...) { + set_cli_abort_call() + default_eargs <- list( + x = 0.8, + y = 0.85, + w = 0.32, + h = 0.16, + font_size = 10, + fill = TRUE + ) + eargs <- list(...) + eargs <- utils::modifyList(default_eargs, eargs) + + # Checks + check_class(fit_km, "survfit") + check_class(gg_plt, c("gg", "ggplot", "cowplot")) + + # Check position/size (x, y, w, h, font_size) - Must be single non-missing numeric + for (arg_name in c("x", "y", "w", "h", "font_size")) { + check_numeric(eargs[[arg_name]]) + } + check_logical(eargs[["fill"]]) + + # Determine strata_levels for h_tbl_median_surv, assuming it's available in the calling environment or logic should + # be updated. For now, keeping as is, but this typically requires strata_levels or inferring it from fit_km + strata_levels <- if (is.null(fit_km$strata)) "All" else levels(fit_km$strata) # Placeholder for strata_levels + + surv_med_tbl <- h_tbl_median_surv(fit_km = fit_km, strata_levels = strata_levels) + bg_fill <- if (isTRUE(eargs[["fill"]])) "#00000020" else eargs[["fill"]] + + gg_surv_med <- df2gg(surv_med_tbl, font_size = eargs[["font_size"]], colwidths = c(1, 1, 2), bg_fill = bg_fill) + + ggplot2::theme( + axis.text.y = ggplot2::element_text(size = eargs[["font_size"]], face = "italic", hjust = 1), + plot.margin = ggplot2::margin(0, 2, 0, 5) + ) + + ggplot2::coord_cartesian(clip = "off", ylim = c(0.5, nrow(surv_med_tbl) + 1.5)) + gg_surv_med <- suppressMessages( + gg_surv_med + + ggplot2::scale_x_continuous(expand = c(0.025, 0)) + + ggplot2::scale_y_continuous(labels = rev(rownames(surv_med_tbl)), breaks = seq_len(nrow(surv_med_tbl))) + ) + + gg_plt <- cowplot::ggdraw(gg_plt) + + cowplot::draw_plot( + gg_surv_med, eargs[["x"]], eargs[["y"]], + width = eargs[["w"]], height = eargs[["h"]], + vjust = 0.5, hjust = 0.5 + ) + gg_plt +} + +#' @describeIn annotate_gg_km The function `annotate_coxph()` adds a Cox Proportional Hazards summary table created by +#' the function [get_cox_pairwise_df()] as an annotation box. +#' +#' @param coxph_tbl (`data.frame`)\cr +#' A data frame containing the pre-calculated Cox-PH results, typically from a function like `get_cox_pairwise_df`. +#' This data is used to generate the annotation table content. +#' @param ... Additional arguments passed to the control list for the annotation box. +#' These arguments override the default values. +#' Accepted arguments include: +#' \itemize{ +#' \item \code{x} (`numeric`): X-coordinate for the box anchor position (0 to 1). Default is \code{0.29}. +#' \item \code{y} (`numeric`): Y-coordinate for the box anchor position (0 to 1). Default is \code{0.51}. +#' \item \code{w} (`numeric`): Width of the annotation box (0 to 1). Default is \code{0.4}. +#' \item \code{h} (`numeric`): Height of the annotation box (0 to 1). Default is \code{0.125}. +#' } +#' +#' @return The function `annotate_coxph` returns a `cowplot` object with the Cox-PH table annotation added. +#' +#' @examples +#' # Annotate Kaplan-Meier Plot with Cox-PH Table +#' coxph_tbl <- get_cox_pairwise_df(formula, data = use_lung, arm = "arm", ref_group = "A") +#' annotate_coxph(plt_kmg01, coxph_tbl) +#' +#' @export +annotate_coxph <- function(gg_plt, coxph_tbl, ...) { + set_cli_abort_call() + default_eargs <- list( + x = 0.29, + y = 0.51, + w = 0.4, + h = 0.125, + fill = TRUE, + font_size = 10 + ) + eargs <- list(...) + eargs <- utils::modifyList(default_eargs, eargs) + + # Check position/size (x, y, w, h, font_size) - Must be single non-missing numeric + for (arg_name in c("x", "y", "w", "h", "font_size")) { + check_numeric(eargs[[arg_name]]) + } + check_logical(eargs[["fill"]]) + + bg_fill <- if (isTRUE(eargs[["fill"]])) "#00000020" else eargs[["fill"]] + + gg_coxph <- df2gg(coxph_tbl, font_size = eargs$font_size, colwidths = c(1.1, 1, 3), bg_fill = bg_fill) + + ggplot2::theme( + axis.text.y = ggplot2::element_text(size = eargs$font_size, face = "italic", hjust = 1), + plot.margin = ggplot2::margin(0, 2, 0, 5) + ) + + ggplot2::coord_cartesian(clip = "off", ylim = c(0.5, nrow(coxph_tbl) + 1.5)) + gg_coxph <- suppressMessages( + gg_coxph + + ggplot2::scale_x_continuous(expand = c(0.025, 0)) + + ggplot2::scale_y_continuous(labels = rev(rownames(coxph_tbl)), breaks = seq_len(nrow(coxph_tbl))) + ) + + gg_plt <- cowplot::ggdraw(gg_plt) + + cowplot::draw_plot( + gg_coxph, eargs[["x"]], eargs[["y"]], + width = eargs[["w"]], height = eargs[["h"]], + vjust = 0.5, hjust = 0.5 + ) + + gg_plt +} diff --git a/R/crane-package.R b/R/crane-package.R index 9d4c0225..cea5e9fa 100644 --- a/R/crane-package.R +++ b/R/crane-package.R @@ -1,8 +1,16 @@ #' @keywords internal #' @import rlang +#' @import ggplot2 #' @import glue glue +#' @importFrom broom tidy +#' @importFrom cowplot plot_grid ggdraw draw_plot #' @importFrom dplyr across starts_with ends_with contains matches num_range #' all_of any_of everything last_col where +#' @importFrom labeling extended +#' @importFrom survival coxph Surv survdiff +#' @importFrom stats pchisq +#' @importFrom tidyr pivot_wider +#' @importFrom utils tail "_PACKAGE" ## usethis namespace: start diff --git a/R/get_cox_pairwise_df.R b/R/get_cox_pairwise_df.R new file mode 100644 index 00000000..61518813 --- /dev/null +++ b/R/get_cox_pairwise_df.R @@ -0,0 +1,123 @@ +#' Generate Table of Pairwise Cox-PH and Log-Rank Results +#' +#' @description +#' This function performs pairwise comparisons of treatment arms using the **Cox Proportional Hazards model** and +#' calculates the corresponding **log-rank p-value**. Each comparison tests a non-reference group against a specified +#' reference group. +#' +#' @param model_formula (`formula`)\cr +#' A `formula` object specifying the survival model, typically in the form `Surv(time, status) ~ arm + covariates`. +#' @param data (`data.frame`)\cr +#' A `data.frame` containing the survival data, including time, status, and the arm variable. +#' @param arm (`character`)\cr +#' A single character string specifying the name of the column in `data` that contains the grouping/treatment +#' **arm variable**. This column **must be a factor** for correct stratification and comparison. +#' @param ref_group (`character` or `NULL`)\cr +#' A single character string specifying the level of the `arm` variable to be used as the **reference group** for +#' all pairwise comparisons. If `NULL` (the default), the **first unique level** of the `arm` column is automatically +#' selected as the reference group. +#' +#' @return A `data.frame` with the results of the pairwise comparisons. The columns include: +#' \itemize{ +#' \item `arm`: (rownames of the `data.frame`) The comparison arm (group) being tested against the reference group. +#' \item `hr`: The Hazard Ratio (HR) for the comparison arm vs. the reference arm, formatted to two decimal places. +#' \item `ci`: The 95% confidence interval for the HR, presented as a string in the format "(lower, upper)", with +#' values formatted to two decimal places. +#' \item `pval`: The log-rank p-value for the comparison. +#' } +#' +#' @details The function iterates through each unique arm (excluding the reference group). For each iteration, it +#' filters the data to include only the current comparison arm and the reference arm, and then: +#' \itemize{ +#' \item Fits a Cox model using `survival::coxph`. +#' \item Performs a log-rank test using `survival::survdiff`. +#' } +#' The Hazard Ratio and its 95% confidence interval are extracted from the Cox model summary, and the p-value is +#' extracted from the log-rank test. +#' +#' @seealso `annotate_gg_km()`, `gg_km()`, and the `survival` package functions `survival::coxph` and +#' `survival::survdiff`. +#' +#' @examples +#' # Example data setup (assuming 'time' is event time, 'status' is event indicator (1=event), +#' # and 'arm' is the treatment group) +#' library(dplyr) # For better data handling +#' +#' # Prepare data in a modern dplyr-friendly way +#' surv_data <- survival::lung |> +#' mutate( +#' arm = factor(sample(c("A", "B", "C"), n(), replace = TRUE)), +#' status = status - 1 # Convert status to 0/1 +#' ) |> +#' filter(if_all(everything(), ~ !is.na(.))) +#' +#' formula <- survival::Surv(time, status) ~ arm +#' results_tbl <- get_cox_pairwise_df( +#' model_formula = formula, +#' data = surv_data, +#' arm = "arm", +#' ref_group = "A" +#' ) +#' print(results_tbl) +#' +#' @export +get_cox_pairwise_df <- function(model_formula, data, arm, ref_group = NULL) { + set_cli_abort_call() + # Input checks + if (!rlang::is_formula(model_formula)) { + cli::cli_abort( + "{.arg model_formula} must be a {.cls formula}.", + call = get_cli_abort_call() + ) + } + if (!is.factor(data[[arm]])) { + cli::cli_abort( + "Column {.arg {data}[[\"{.var {arm}}\"]]} must be a {.cls factor}.", + call = get_cli_abort_call() + ) + } + + # Determine reference and comparison groups + ref_group <- if (!is.null(ref_group)) { + ref_group + } else { + levels(data[[arm]])[1] + } + comp_group <- setdiff(levels(data[[arm]]), ref_group) + + ret <- c() + for (current_arm in comp_group) { + subset_arm <- c(ref_group, current_arm) + if (length(subset_arm) != 2) { + cli::cli_abort( + "{.arg subset_arm} must contain exactly 2 arms/groups (current length is {length(subset_arm)}).", + call = get_cli_abort_call() + ) + } + comp_df <- data[as.character(data[[arm]]) %in% subset_arm, ] + suppressWarnings( + coxph_ans <- coxph(formula = model_formula, data = comp_df) |> summary() + ) + orginal_survdiff <- survdiff(formula = model_formula, data = comp_df) + log_rank_pvalue <- 1 - stats::pchisq(orginal_survdiff$chisq, length(orginal_survdiff$n) - 1) + current_row <- data.frame( + hr = sprintf("%.2f", coxph_ans$conf.int[1, 1]), + ci = paste0( + "(", + sprintf("%.2f", coxph_ans$conf.int[1, 3]), + ", ", + sprintf("%.2f", coxph_ans$conf.int[1, 4]), + ")" + ), + pval = log_rank_pvalue + ) + rownames(current_row) <- current_arm + ret <- rbind(ret, current_row) + } + names(ret) <- c( + "HR", + "95% CI", + "p-value (log-rank)" + ) + ret +} diff --git a/R/gg_km.R b/R/gg_km.R new file mode 100644 index 00000000..8e4f50e3 --- /dev/null +++ b/R/gg_km.R @@ -0,0 +1,301 @@ +#' Kaplan-Meier Plot +#' +#' @description +#' This set of functions facilitates the creation of Kaplan-Meier survival plots using `ggplot2`. Use +#' `process_survfit()` to prepare the survival data from a fitted `survfit` object, and then +#' `gg_km()` to generate the Kaplan-Meier plot with various customization options. Additional functions +#' like `annot_surv_med()`, `annot_cox_ph()`, and `annotate_riskdf()` allow for adding summary tables and +#' annotations to the plot. +#' +#' @name gg_km +NULL + +#' @describeIn gg_km takes a fitted [survfit] object and processes it into a data frame +#' suitable for plotting a Kaplan-Meier curve with `ggplot2`. Time zero is also added to the data. +#' +#' @param fit_km A fitted Kaplan-Meier object of class `survfit`. +#' @param strata_levels (`string`)\cr +#' A single character string used as the strata level if the input `fit_km` object +#' has no strata (e.g., `"All"`). +#' @param max_time (`numeric` or `NULL`)\cr +#' A single numeric value defining the **maximum time point** to include in the data, +#' or `NULL` for no time limit. +#' +#' @return The function `process_survfit` returns a data frame containing the survival +#' curve steps, confidence intervals, and censoring info. +#' +#' @details +#' Data setup assumes `"time"` is event time, `"status"` is event indicator (`1` represents an event), +#' while `"arm"` is the treatment group. +#' +#' @examples +#' # Data preparation for KM plot +#' use_lung <- survival::lung +#' use_lung$arm <- factor(sample(c("A", "B", "C"), nrow(use_lung), replace = TRUE)) +#' use_lung$status <- use_lung$status - 1 # Convert status to 0/1 +#' use_lung <- na.omit(use_lung) +#' +#' # Fit Kaplan-Meier model +#' formula <- survival::Surv(time, status) ~ arm +#' fit_kmg01 <- survival::survfit(formula, use_lung) +#' +#' # Process survfit data for plotting +#' surv_plot_data <- process_survfit(fit_kmg01) +#' head(surv_plot_data) +#' +#' @export +process_survfit <- function(fit_km, + strata_levels = "All", + max_time = NULL) { + set_cli_abort_call() + + # Input checks + if (!inherits(fit_km, "survfit")) { + cli::cli_abort( + "The input {.arg fit_km} must be a fitted Kaplan-Meier object of class {.cls survfit}.", + call = get_cli_abort_call() + ) + } + check_string(strata_levels) + check_numeric(max_time, allow_empty = TRUE) + + y <- broom::tidy(fit_km) + + # Handle strata factor levels + if (!is.null(fit_km$strata)) { + fit_km_var_level <- strsplit(sub("=", "equals", names(fit_km$strata)), "equals") + strata_levels <- vapply(fit_km_var_level, FUN = "[", FUN.VALUE = "a", i = 2) + strata_var_level <- strsplit(sub("=", "equals", y$strata), "equals") + y$strata <- factor( + vapply(strata_var_level, FUN = "[", FUN.VALUE = "a", i = 2), + levels = strata_levels + ) + } else { + y$strata <- strata_levels + } + + # Extend to time zero + y_by_strata <- split(y, y$strata) + y_by_strata_extended <- lapply( + y_by_strata, + FUN = function(tbl) { + first_row <- tbl[1L, ] + first_row$time <- 0 + first_row$n.risk <- sum(first_row[, c("n.risk", "n.event", "n.censor")]) + first_row$n.event <- first_row$n.censor <- 0 + first_row$estimate <- first_row$conf.high <- first_row$conf.low <- 1 + first_row$std.error <- 0 + rbind( + first_row, + tbl + ) + } + ) + y <- do.call(rbind, y_by_strata_extended) + + # Censoring points + y$censor <- ifelse(y$n.censor > 0, y$estimate, NA) + + # Apply max_time filter if provided + if (!is.null(max_time)) { + y <- y[y$time <= max(max_time), ] + } + + y +} + + +#' @describeIn gg_km creates a Kaplan-Meier survival curve, with +#' support for various customizations like censoring marks, Confidence Intervals (CIs), and axis control. +#' +#' @param surv_plot_data (`data.frame`)\cr +#' A data frame containing the pre-processed survival data, ready for plotting. +#' This data should be equivalent to the output of `process_survfit`. +#' @param lty (`numeric` or `NULL`)\cr +#' A numeric vector of **line types** (e.g., `1` for solid, `2` for dashed) for the survival curves, or `NULL` for +#' `ggplot2` defaults. The length should match the number of arms/groups. +#' @param lwd (`numeric`)\cr +#' A single numeric value specifying the **line width** for the survival curves. +#' @param censor_show (`logical`)\cr +#' A single logical value indicating whether to display **censoring marks** on the plot. Defaults to `TRUE`. +#' @param size (`numeric`)\cr +#' A single numeric value specifying the **size** of the censoring marks. +#' @param max_time (`numeric`)\cr +#' A single numeric value defining the **maximum time point** to display on the x-axis. +#' @param xticks (`numeric` or `NULL`)\cr +#' A numeric vector of explicit **x-axis tick positions**, or a single numeric value representing the **interval** +#' between ticks, or `NULL` for automatic `ggplot2` scaling. +#' @param yval (`character`)\cr +#' A single character string, either `"Survival"` or `"Failure"` to plot the corresponding probability. +#' @param ylim (`numeric`)\cr +#' A **numeric vector of length 2** defining the lower and upper limits of the y-axis (e.g., `c(0, 1)`). +#' @param font_size (`numeric`)\cr +#' A single numeric value specifying the **base font size** for the plot theme elements. +#' @param legend_pos (`numeric` or `NULL`)\cr +#' A **numeric vector of length 2** defining the **legend position** as (x, y) coordinates relative to the plot +#' area (ranging from 0 to 1), or `NULL` for automatic placement. +#' +#' @return The function `gg_km` returns a `ggplot2` object of the KM plot. +#' +#' @examples +#' # Example of making the KM plot +#' plt_kmg01 <- gg_km(surv_plot_data) +#' +#' # Confidence Interval as Ribbon +#' plt_kmg01 + +#' ggplot2::geom_ribbon(alpha = 0.3, lty = 0, na.rm = TRUE) +#' +#' # Adding Title and Footnotes +#' plt_kmg01 + +#' ggplot2::labs(title = "title", caption = "footnotes") +#' +#' # Changing xlab and ylab +#' plt_kmg01 + +#' ggplot2::xlab("Another Day") + +#' ggplot2::ylab("THE Survival Probability") +#' +#' @export +gg_km <- function(surv_plot_data, + lty = NULL, + lwd = 0.5, + censor_show = TRUE, + size = 2, + max_time = NULL, + xticks = NULL, + yval = c("Survival", "Failure"), + ylim = NULL, + font_size = 10, + legend_pos = NULL) { + set_cli_abort_call() + + # Input checks + check_data_frame(surv_plot_data) + needed_cols <- c("time", "estimate", "conf.low", "conf.high", "strata", "n.censor", "censor") + if (!all(needed_cols %in% colnames(surv_plot_data))) { + cli::cli_abort( + "The input {.arg surv_plot_data} must contain the following columns: ", + "{.code time}, {.code estimate}, {.code conf.low}, {.code conf.high}, ", + "{.code strata}, {.code n.censor}, and {.code censor}.", + call = get_cli_abort_call() + ) + } + if (nrow(surv_plot_data) < 1) { + cli::cli_abort( + "The input {.arg surv_plot_data} must contain at least one row of data.", + call = get_cli_abort_call() + ) + } + check_numeric(ylim, allow_empty = TRUE) + check_scalar_logical(censor_show) + + data <- surv_plot_data + strata_levels <- levels(data$strata) + + yval <- match.arg(yval) + if (yval == "Failure") { + data[c("estimate", "conf.low", "conf.high", "censor")] <- list( + 1 - data$estimate, 1 - data$conf.low, 1 - data$conf.high, 1 - data$censor + ) + } + + if (is.null(ylim)) { + if (!is.null(max_time)) { + y_lwr <- min(data[data$time < max_time, ][["estimate"]]) + y_upr <- max(data[data$time < max_time, ][["estimate"]]) + } else { + y_lwr <- min(data[["estimate"]]) + y_upr <- max(data[["estimate"]]) + } + ylim <- c(y_lwr, y_upr) + } + + xticks <- h_xticks(data = data, xticks = xticks, max_time = max_time) + + gg_plt <- ggplot2::ggplot( + data = data, + mapping = ggplot2::aes( + x = .data[["time"]], y = .data[["estimate"]], ymin = .data[["conf.low"]], + ymax = .data[["conf.high"]], color = .data[["strata"]], fill = .data[["strata"]] + ) + ) + + ggplot2::theme_bw(base_size = font_size) + + ggplot2::scale_y_continuous(limits = ylim, expand = c(0.025, 0)) + + ggplot2::labs(x = "Days", y = paste(yval, "Probability")) + + ggplot2::theme( + axis.text = ggplot2::element_text(size = font_size), + axis.title = ggplot2::element_text(size = font_size), + legend.title = ggplot2::element_blank(), + legend.text = ggplot2::element_text(size = font_size), + legend.box.background = ggplot2::element_rect(fill = "white", linewidth = 0.5), + legend.background = ggplot2::element_blank(), + legend.position = "inside", + legend.spacing.y = ggplot2::unit(-0.02, "npc"), + panel.grid.major = ggplot2::element_blank(), + panel.grid.minor = ggplot2::element_blank() + ) + + if (!is.null(max_time) && !is.null(xticks)) { + gg_plt <- gg_plt + ggplot2::scale_x_continuous( + breaks = xticks, limits = c(min(0, xticks), max(c(xticks, max_time))), expand = c(0.025, 0) + ) + } else if (!is.null(xticks)) { + if (max(data$time) <= max(xticks)) { + gg_plt <- gg_plt + ggplot2::scale_x_continuous( + breaks = xticks, limits = c(min(0, min(xticks)), max(xticks)), expand = c(0.025, 0) + ) + } else { + gg_plt <- gg_plt + ggplot2::scale_x_continuous(breaks = xticks, expand = c(0.025, 0)) + } + } else if (!is.null(max_time)) { + gg_plt <- gg_plt + ggplot2::scale_x_continuous(limits = c(0, max_time), expand = c(0.025, 0)) + } + + if (!is.null(legend_pos)) { + gg_plt <- gg_plt + ggplot2::theme(legend.position.inside = legend_pos) + } else { + max_time2 <- sort( + data$time, + partial = nrow(data) - length(strata_levels) - 1 + )[nrow(data) - length(strata_levels) - 1] + + y_rng <- ylim[2] - ylim[1] + + if (yval == "Survival" && all(data$estimate[data$time == max_time2] > ylim[1] + 0.09 * y_rng) && + all(data$estimate[data$time == max_time2] < ylim[1] + 0.5 * y_rng)) { # nolint + gg_plt <- gg_plt + + ggplot2::theme( + legend.position.inside = c(1, 0.5), + legend.justification = c(1.1, 0.6) + ) + } else { + gg_plt <- gg_plt + + ggplot2::theme( + legend.position.inside = c(1, 0), + legend.justification = c(1.1, -0.4) + ) + } + } + + gg_plt <- if (is.null(lty)) { + gg_plt + ggplot2::geom_step(linewidth = lwd, na.rm = TRUE) + } else if (length(lty) == 1) { + gg_plt + ggplot2::geom_step(linewidth = lwd, lty = lty, na.rm = TRUE) + } else { + gg_plt + + ggplot2::geom_step(ggplot2::aes(lty = .data[["strata"]]), linewidth = lwd, na.rm = TRUE) + + ggplot2::scale_linetype_manual(values = lty) + } + + if (isTRUE(censor_show)) { + gg_plt <- gg_plt + ggplot2::geom_point( + data = data[data$n.censor != 0, ], + ggplot2::aes(x = .data[["time"]], y = .data[["censor"]], shape = "Censored"), + size = size, + na.rm = TRUE + ) + + ggplot2::scale_shape_manual(values = 3) + + ggplot2::guides(fill = ggplot2::guide_legend(override.aes = list(shape = NA))) + } + + gg_plt +} diff --git a/R/gg_km_utils.R b/R/gg_km_utils.R new file mode 100644 index 00000000..d6eb431f --- /dev/null +++ b/R/gg_km_utils.R @@ -0,0 +1,184 @@ +#' Convert Data Frame to ggplot2 Table Graphic +#' +#' @description Creates a `ggplot2` object that renders a data frame as a table graphic. +#' +#' @param df The data frame to render. +#' @param colwidths Numeric vector of relative column widths. If \code{NULL}, determined by max character length. +#' @param font_size Numeric base font size. +#' @param col_labels Logical, whether to display column labels (header). +#' @param col_lab_fontface Character string for the font face of column labels (e.g., "bold"). +#' @param hline Logical, whether to draw a horizontal line below the column labels. +#' @param bg_fill Optional color string for the plot background. +#' @param add_proper_xaxis Logical, whether to add a proper x-axis with column values. +#' +#' @return A \code{ggplot2} object representing the table. +#' +#' @keywords internal +df2gg <- function(df, colwidths = NULL, font_size = 10, col_labels = TRUE, + col_lab_fontface = "bold", hline = TRUE, bg_fill = NULL, add_proper_xaxis = FALSE) { + # Convert all values to character, replacing NAs with "NA" + df <- as.data.frame(apply(df, 1:2, function(x) { + if (is.na(x)) { + "NA" + } else { + as.character(x) + } + })) + + # Add column labels as first row if specified + if (col_labels) { + df <- as.matrix(df) + df <- rbind(colnames(df), df) + } + + # Create ggplot2 object with x-axis specified in df + if (add_proper_xaxis) { + # Determine column widths if not provided + if (is.null(colwidths)) { + tot_width <- max(colnames(df) |> as.numeric(), na.rm = TRUE) + colwidths <- rep(floor(tot_width / ncol(df)), ncol(df)) + } else { + tot_width <- sum(colwidths) + } + + df_long <- df |> + as.data.frame() |> + # 1. Ensure the row names ('A', 'B', 'C') are a column named 'row_name' + dplyr::mutate(row_name = row.names(df)) |> + # 2. Pivot the remaining columns (starting from '0' to the end) longer + tidyr::pivot_longer( + cols = -.data$row_name, # Select all columns EXCEPT 'row_name' + names_to = "col_name", # Name the new column containing the old column headers + values_to = "value" # Name the new column containing the data values + ) |> + dplyr::arrange(.data$row_name, .data$col_name) |> + dplyr::mutate( + col_name = as.numeric(.data$col_name), + row_name = factor(.data$row_name, levels = row.names(df)) + ) + res <- ggplot2::ggplot(data = df_long) + + ggplot2::theme_void() + + ggplot2::annotate("text", + x = df_long$col_name, y = rev(df_long$row_name), # why rev? + label = df_long$value, size = font_size / .pt + ) + + # Create ggplot2 object with a specific x-axis based on column widths + } else { + # Determine column widths if not provided + if (is.null(colwidths)) { + colwidths <- apply(df, 2, function(x) max(nchar(x), na.rm = TRUE)) + } + tot_width <- sum(colwidths) + + res <- ggplot2::ggplot(data = df) + + ggplot2::theme_void() + + ggplot2::scale_x_continuous(limits = c(0, tot_width)) + + ggplot2::scale_y_continuous(limits = c(1, nrow(df))) + + + for (i in seq_len(ncol(df))) { + line_pos <- c( + if (i == 1) { + 0 + } else { + sum(colwidths[1:(i - 1)]) + }, + sum(colwidths[1:i]) + ) + res <- res + ggplot2::annotate("text", + x = mean(line_pos), y = rev(seq_len(nrow(df))), + label = df[, i], size = font_size / .pt, fontface = if (col_labels) { + c(col_lab_fontface, rep("plain", nrow(df) - 1)) + } else { + rep("plain", nrow(df)) + } + ) + } + } + + # Add horizontal line if specified + if (hline) { + res <- res + ggplot2::annotate( + "segment", + x = 0 + 0.2 * colwidths[2], xend = tot_width - 0.1 * tail(colwidths, 1), + y = nrow(df) - 0.5, yend = nrow(df) - 0.5 + ) + } + + # Set background fill if specified + if (!is.null(bg_fill)) { + res <- res + ggplot2::theme(plot.background = ggplot2::element_rect(fill = bg_fill)) + } + + res +} + +#' Calculate X-axis Ticks +#' +#' @description Determines the positions for x-axis ticks based on the data and user input. +#' +#' @param data A data frame containing a `"time""` column. +#' @param xticks A numeric vector of specific tick positions, a single number for the interval, or +#' `NULL` for auto-calculation. +#' @param max_time Optional numeric value specifying the maximum time to consider for tick range. +#' +#' @return A numeric vector of x-axis tick positions. +#' +#' @keywords internal +h_xticks <- function(data, xticks = NULL, max_time = NULL) { + if (is.null(xticks)) { + if (is.null(max_time)) { + labeling::extended(range(data$time)[1], range(data$time)[2], m = 5) + } else { + labeling::extended(range(data$time)[1], max(range(data$time)[2], max_time), m = 5) + } + } else if (is.numeric(xticks) && length(xticks) == 1 && !is.na(xticks)) { + if (is.null(max_time)) { + seq(0, max(data$time), xticks) + } else { + seq(0, max(data$time, max_time), xticks) + } + } else if (is.numeric(xticks)) { + xticks + } else { + stop( + paste( + "xticks should be either `NULL`", + "or a single number (interval between x ticks)", + "or a numeric vector (position of ticks on the x axis)" + ) + ) + } +} + +#' @title Median Survival Summary Table +#' +#' @description Extracts and formats the median survival time and its confidence interval +#' from a fitted Kaplan-Meier object. +#' +#' @inheritParams gg_km +#' +#' @return A data frame with columns "N", "Median", and the confidence interval label. +#' +#' @keywords internal +h_tbl_median_surv <- function(fit_km, strata_levels = "All") { + y <- if (is.null(fit_km$strata)) { + as.data.frame(t(summary(fit_km)$table), row.names = strata_levels) + } else { + tbl <- summary(fit_km)$table + rownames_lst <- strsplit(sub("=", "equals", rownames(tbl)), "equals") + rownames(tbl) <- matrix(unlist(rownames_lst), ncol = 2, byrow = TRUE)[, 2] + as.data.frame(tbl) + } + conf.int <- summary(fit_km)$conf.int # nolint + y$records <- round(y$records) + y$median <- signif(y$median, 4) + y$`CI` <- paste0( + "(", signif(y[[paste0(conf.int, "LCL")]], 4), ", ", signif(y[[paste0(conf.int, "UCL")]], 4), ")" + ) + stats::setNames( + y[c("records", "median", "CI")], + c("N", "Median", paste0(conf.int * 100, "% CI")) + ) +} diff --git a/R/tbl_null_report.R b/R/tbl_null_report.R index d7d47cd0..a86c0ba3 100644 --- a/R/tbl_null_report.R +++ b/R/tbl_null_report.R @@ -10,8 +10,7 @@ #' #' @export #' @rdname tbl_null_report -tbl_null_report <- function( - label = "No observations met the reporting criteria for this output.") { +tbl_null_report <- function(label = "No observations met the reporting criteria for this output.") { set_cli_abort_call() # Check input label ---------------------------------------------------------- diff --git a/_pkgdown.yml b/_pkgdown.yml index c850f898..3015c9d7 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -41,3 +41,9 @@ reference: - modify_zero_recode - add_blank_rows - label_roche + - title: "Kaplan-Meier Plot" + contents: + - process_survfit + - gg_km + - annotate_gg_km + - get_cox_pairwise_df diff --git a/inst/WORDLIST b/inst/WORDLIST index b95a577a..123b296b 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -8,16 +8,20 @@ Kaplan Ns ORCID RStudio +Recode Rua SOCs Tidyverse cardx +customizations de flextable funder +ggplot gtsummary pharma pre +recodes rlang's survfit tbl diff --git a/man/annotate_gg_km.Rd b/man/annotate_gg_km.Rd new file mode 100644 index 00000000..f25d6668 --- /dev/null +++ b/man/annotate_gg_km.Rd @@ -0,0 +1,116 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/annotate_gg_km.R +\name{annotate_gg_km} +\alias{annotate_gg_km} +\alias{annotate_riskdf} +\alias{annotate_surv_med} +\alias{annotate_coxph} +\title{Annotate Kaplan-Meier Plot} +\usage{ +annotate_riskdf( + gg_plt, + fit_km, + title = "Patients at Risk:", + rel_height_plot = 0.75, + xlab = "Days", + ... +) + +annotate_surv_med(gg_plt, fit_km, ...) + +annotate_coxph(gg_plt, coxph_tbl, ...) +} +\arguments{ +\item{gg_plt}{(\code{ggplot2} or \code{cowplot})\cr +The primary plot object (either a \code{ggplot2} or \code{cowplot} object) of the Kaplan-Meier plot.} + +\item{fit_km}{(\code{survfit})\cr +A fitted Kaplan-Meier object of class \code{survfit} (from the \code{survival} package). This object contains the necessary +survival data used to calculate and generate the content displayed in the annotation table.} + +\item{title}{(\code{string})\cr +A single logical value indicating whether to include a above the table. Defaults to +\verb{""Patients at Risk:""}. If \code{NULL}, no title is added.} + +\item{rel_height_plot}{(\code{numeric})\cr +A single numeric value defining the \strong{relative height} of the main Kaplan-Meier plot area compared +to the 'at-risk' table. This value should be between 0 and 1, where a value closer to 1 gives the main plot +more vertical space. Defaults to \code{0.75}.} + +\item{xlab}{(\code{character})\cr +A single character string for the \strong{x-axis label} on the 'at-risk' table. This typically represents +time (e.g., "Time (Days)").} + +\item{...}{Additional arguments passed to the control list for the annotation box. +These arguments override the default values. +Accepted arguments include: +\itemize{ +\item \code{x} (\code{numeric}): X-coordinate for the box anchor position (0 to 1). Default is \code{0.29}. +\item \code{y} (\code{numeric}): Y-coordinate for the box anchor position (0 to 1). Default is \code{0.51}. +\item \code{w} (\code{numeric}): Width of the annotation box (0 to 1). Default is \code{0.4}. +\item \code{h} (\code{numeric}): Height of the annotation box (0 to 1). Default is \code{0.125}. +}} + +\item{coxph_tbl}{(\code{data.frame})\cr +A data frame containing the pre-calculated Cox-PH results, typically from a function like \code{get_cox_pairwise_df}. +This data is used to generate the annotation table content.} +} +\value{ +The function \code{annotate_riskdf} returns a \code{cowplot} object combining the KM plot and the 'Numbers at Risk' +table. + +The function \code{annotate_surv_med} returns a \code{cowplot} object with the median survival table annotation +added, ready for final display or saving. + +The function \code{annotate_coxph} returns a \code{cowplot} object with the Cox-PH table annotation added. +} +\description{ +These functions provide capabilities to annotate Kaplan-Meier plots (\code{\link[=gg_km]{gg_km()}}) with additional summary tables, +including median survival times, numbers at risk, and cox proportional hazards results. +The annotations are added using the \code{cowplot} package for flexible placement. +} +\section{Functions}{ +\itemize{ +\item \code{annotate_riskdf()}: The function \code{annotate_riskdf} adds a "Numbers at Risk" table below a +Kaplan-Meier plot (\code{\link[=gg_km]{gg_km()}}) using \code{cowplot::plot_grid}. + +\item \code{annotate_surv_med()}: The \code{annotate_surv_med} function adds a median survival time summary table as an +annotation box. + +\item \code{annotate_coxph()}: The function \code{annotate_coxph()} adds a Cox Proportional Hazards summary table created by +the function \code{\link[=get_cox_pairwise_df]{get_cox_pairwise_df()}} as an annotation box. + +}} +\examples{ +# Preparing the Kaplan-Meier Plot +use_lung <- survival::lung +use_lung$arm <- factor(sample(c("A", "B", "C"), nrow(use_lung), replace = TRUE)) +use_lung$status <- use_lung$status - 1 # Convert status to 0/1 +use_lung <- na.omit(use_lung) + +formula <- survival::Surv(time, status) ~ arm +fit_kmg01 <- survival::survfit(formula, use_lung) +surv_plot_data <- process_survfit(fit_kmg01) + +plt_kmg01 <- gg_km(surv_plot_data) + +# Annotate Plot with Numbers at Risk Table +annotate_riskdf(plt_kmg01, fit_kmg01) + +# Change order of y-axis (arm) +use_lung2 <- use_lung +use_lung2$arm <- factor(use_lung2$arm, levels = c("C", "B", "A")) +fit_kmg01 <- survival::survfit(formula, use_lung2) +annotate_riskdf(plt_kmg01, fit_kmg01) # rerun gg_km to change legend order + +# Annotate Kaplan-Meier Plot with Median Survival Table +annotate_surv_med(plt_kmg01, fit_kmg01) + +# Annotate Kaplan-Meier Plot with Cox-PH Table +coxph_tbl <- get_cox_pairwise_df(formula, data = use_lung, arm = "arm", ref_group = "A") +annotate_coxph(plt_kmg01, coxph_tbl) + +} +\seealso{ +\code{\link[=gg_km]{gg_km()}}, \code{\link[=process_survfit]{process_survfit()}}, and \code{\link[=get_cox_pairwise_df]{get_cox_pairwise_df()}} for related functionalities. +} diff --git a/man/df2gg.Rd b/man/df2gg.Rd new file mode 100644 index 00000000..ace83168 --- /dev/null +++ b/man/df2gg.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gg_km_utils.R +\name{df2gg} +\alias{df2gg} +\title{Convert Data Frame to ggplot2 Table Graphic} +\usage{ +df2gg( + df, + colwidths = NULL, + font_size = 10, + col_labels = TRUE, + col_lab_fontface = "bold", + hline = TRUE, + bg_fill = NULL, + add_proper_xaxis = FALSE +) +} +\arguments{ +\item{df}{The data frame to render.} + +\item{colwidths}{Numeric vector of relative column widths. If \code{NULL}, determined by max character length.} + +\item{font_size}{Numeric base font size.} + +\item{col_labels}{Logical, whether to display column labels (header).} + +\item{col_lab_fontface}{Character string for the font face of column labels (e.g., "bold").} + +\item{hline}{Logical, whether to draw a horizontal line below the column labels.} + +\item{bg_fill}{Optional color string for the plot background.} + +\item{add_proper_xaxis}{Logical, whether to add a proper x-axis with column values.} +} +\value{ +A \code{ggplot2} object representing the table. +} +\description{ +Creates a \code{ggplot2} object that renders a data frame as a table graphic. +} +\keyword{internal} diff --git a/man/get_cox_pairwise_df.Rd b/man/get_cox_pairwise_df.Rd new file mode 100644 index 00000000..beaad285 --- /dev/null +++ b/man/get_cox_pairwise_df.Rd @@ -0,0 +1,76 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_cox_pairwise_df.R +\name{get_cox_pairwise_df} +\alias{get_cox_pairwise_df} +\title{Generate Table of Pairwise Cox-PH and Log-Rank Results} +\usage{ +get_cox_pairwise_df(model_formula, data, arm, ref_group = NULL) +} +\arguments{ +\item{model_formula}{(\code{formula})\cr +A \code{formula} object specifying the survival model, typically in the form \code{Surv(time, status) ~ arm + covariates}.} + +\item{data}{(\code{data.frame})\cr +A \code{data.frame} containing the survival data, including time, status, and the arm variable.} + +\item{arm}{(\code{character})\cr +A single character string specifying the name of the column in \code{data} that contains the grouping/treatment +\strong{arm variable}. This column \strong{must be a factor} for correct stratification and comparison.} + +\item{ref_group}{(\code{character} or \code{NULL})\cr +A single character string specifying the level of the \code{arm} variable to be used as the \strong{reference group} for +all pairwise comparisons. If \code{NULL} (the default), the \strong{first unique level} of the \code{arm} column is automatically +selected as the reference group.} +} +\value{ +A \code{data.frame} with the results of the pairwise comparisons. The columns include: +\itemize{ +\item \code{arm}: (rownames of the \code{data.frame}) The comparison arm (group) being tested against the reference group. +\item \code{hr}: The Hazard Ratio (HR) for the comparison arm vs. the reference arm, formatted to two decimal places. +\item \code{ci}: The 95\% confidence interval for the HR, presented as a string in the format "(lower, upper)", with +values formatted to two decimal places. +\item \code{pval}: The log-rank p-value for the comparison. +} +} +\description{ +This function performs pairwise comparisons of treatment arms using the \strong{Cox Proportional Hazards model} and +calculates the corresponding \strong{log-rank p-value}. Each comparison tests a non-reference group against a specified +reference group. +} +\details{ +The function iterates through each unique arm (excluding the reference group). For each iteration, it +filters the data to include only the current comparison arm and the reference arm, and then: +\itemize{ +\item Fits a Cox model using \code{survival::coxph}. +\item Performs a log-rank test using \code{survival::survdiff}. +} +The Hazard Ratio and its 95\% confidence interval are extracted from the Cox model summary, and the p-value is +extracted from the log-rank test. +} +\examples{ +# Example data setup (assuming 'time' is event time, 'status' is event indicator (1=event), +# and 'arm' is the treatment group) +library(dplyr) # For better data handling + +# Prepare data in a modern dplyr-friendly way +surv_data <- survival::lung |> + mutate( + arm = factor(sample(c("A", "B", "C"), n(), replace = TRUE)), + status = status - 1 # Convert status to 0/1 + ) |> + filter(if_all(everything(), ~ !is.na(.))) + +formula <- survival::Surv(time, status) ~ arm +results_tbl <- get_cox_pairwise_df( + model_formula = formula, + data = surv_data, + arm = "arm", + ref_group = "A" +) +print(results_tbl) + +} +\seealso{ +\code{annotate_gg_km()}, \code{gg_km()}, and the \code{survival} package functions \code{survival::coxph} and +\code{survival::survdiff}. +} diff --git a/man/gg_km.Rd b/man/gg_km.Rd new file mode 100644 index 00000000..1583f690 --- /dev/null +++ b/man/gg_km.Rd @@ -0,0 +1,125 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gg_km.R +\name{gg_km} +\alias{gg_km} +\alias{process_survfit} +\title{Kaplan-Meier Plot} +\usage{ +process_survfit(fit_km, strata_levels = "All", max_time = NULL) + +gg_km( + surv_plot_data, + lty = NULL, + lwd = 0.5, + censor_show = TRUE, + size = 2, + max_time = NULL, + xticks = NULL, + yval = c("Survival", "Failure"), + ylim = NULL, + font_size = 10, + legend_pos = NULL +) +} +\arguments{ +\item{fit_km}{A fitted Kaplan-Meier object of class \code{survfit}.} + +\item{strata_levels}{(\code{string})\cr +A single character string used as the strata level if the input \code{fit_km} object +has no strata (e.g., \code{"All"}).} + +\item{max_time}{(\code{numeric})\cr +A single numeric value defining the \strong{maximum time point} to display on the x-axis.} + +\item{surv_plot_data}{(\code{data.frame})\cr +A data frame containing the pre-processed survival data, ready for plotting. +This data should be equivalent to the output of \code{process_survfit}.} + +\item{lty}{(\code{numeric} or \code{NULL})\cr +A numeric vector of \strong{line types} (e.g., \code{1} for solid, \code{2} for dashed) for the survival curves, or \code{NULL} for +\code{ggplot2} defaults. The length should match the number of arms/groups.} + +\item{lwd}{(\code{numeric})\cr +A single numeric value specifying the \strong{line width} for the survival curves.} + +\item{censor_show}{(\code{logical})\cr +A single logical value indicating whether to display \strong{censoring marks} on the plot. Defaults to \code{TRUE}.} + +\item{size}{(\code{numeric})\cr +A single numeric value specifying the \strong{size} of the censoring marks.} + +\item{xticks}{(\code{numeric} or \code{NULL})\cr +A numeric vector of explicit \strong{x-axis tick positions}, or a single numeric value representing the \strong{interval} +between ticks, or \code{NULL} for automatic \code{ggplot2} scaling.} + +\item{yval}{(\code{character})\cr +A single character string, either \code{"Survival"} or \code{"Failure"} to plot the corresponding probability.} + +\item{ylim}{(\code{numeric})\cr +A \strong{numeric vector of length 2} defining the lower and upper limits of the y-axis (e.g., \code{c(0, 1)}).} + +\item{font_size}{(\code{numeric})\cr +A single numeric value specifying the \strong{base font size} for the plot theme elements.} + +\item{legend_pos}{(\code{numeric} or \code{NULL})\cr +A \strong{numeric vector of length 2} defining the \strong{legend position} as (x, y) coordinates relative to the plot +area (ranging from 0 to 1), or \code{NULL} for automatic placement.} +} +\value{ +The function \code{process_survfit} returns a data frame containing the survival +curve steps, confidence intervals, and censoring info. + +The function \code{gg_km} returns a \code{ggplot2} object of the KM plot. +} +\description{ +This set of functions facilitates the creation of Kaplan-Meier survival plots using \code{ggplot2}. Use +\code{process_survfit()} to prepare the survival data from a fitted \code{survfit} object, and then +\code{gg_km()} to generate the Kaplan-Meier plot with various customization options. Additional functions +like \code{annot_surv_med()}, \code{annot_cox_ph()}, and \code{annotate_riskdf()} allow for adding summary tables and +annotations to the plot. +} +\details{ +Data setup assumes \code{"time"} is event time, \code{"status"} is event indicator (\code{1} represents an event), +while \code{"arm"} is the treatment group. +} +\section{Functions}{ +\itemize{ +\item \code{process_survfit()}: takes a fitted \link[survival:survfit]{survival::survfit} object and processes it into a data frame +suitable for plotting a Kaplan-Meier curve with \code{ggplot2}. Time zero is also added to the data. + +\item \code{gg_km()}: creates a Kaplan-Meier survival curve, with +support for various customizations like censoring marks, Confidence Intervals (CIs), and axis control. + +}} +\examples{ +# Data preparation for KM plot +use_lung <- survival::lung +use_lung$arm <- factor(sample(c("A", "B", "C"), nrow(use_lung), replace = TRUE)) +use_lung$status <- use_lung$status - 1 # Convert status to 0/1 +use_lung <- na.omit(use_lung) + +# Fit Kaplan-Meier model +formula <- survival::Surv(time, status) ~ arm +fit_kmg01 <- survival::survfit(formula, use_lung) + +# Process survfit data for plotting +surv_plot_data <- process_survfit(fit_kmg01) +head(surv_plot_data) + +# Example of making the KM plot +plt_kmg01 <- gg_km(surv_plot_data) + +# Confidence Interval as Ribbon +plt_kmg01 + + ggplot2::geom_ribbon(alpha = 0.3, lty = 0, na.rm = TRUE) + +# Adding Title and Footnotes +plt_kmg01 + + ggplot2::labs(title = "title", caption = "footnotes") + +# Changing xlab and ylab +plt_kmg01 + + ggplot2::xlab("Another Day") + + ggplot2::ylab("THE Survival Probability") + +} diff --git a/man/h_tbl_median_surv.Rd b/man/h_tbl_median_surv.Rd new file mode 100644 index 00000000..17812e95 --- /dev/null +++ b/man/h_tbl_median_surv.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gg_km_utils.R +\name{h_tbl_median_surv} +\alias{h_tbl_median_surv} +\title{Median Survival Summary Table} +\usage{ +h_tbl_median_surv(fit_km, strata_levels = "All") +} +\arguments{ +\item{fit_km}{A fitted Kaplan-Meier object of class \code{survfit}.} + +\item{strata_levels}{(\code{string})\cr +A single character string used as the strata level if the input \code{fit_km} object +has no strata (e.g., \code{"All"}).} +} +\value{ +A data frame with columns "N", "Median", and the confidence interval label. +} +\description{ +Extracts and formats the median survival time and its confidence interval +from a fitted Kaplan-Meier object. +} +\keyword{internal} diff --git a/man/h_xticks.Rd b/man/h_xticks.Rd new file mode 100644 index 00000000..e61f4a50 --- /dev/null +++ b/man/h_xticks.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gg_km_utils.R +\name{h_xticks} +\alias{h_xticks} +\title{Calculate X-axis Ticks} +\usage{ +h_xticks(data, xticks = NULL, max_time = NULL) +} +\arguments{ +\item{data}{A data frame containing a \verb{"time""} column.} + +\item{xticks}{A numeric vector of specific tick positions, a single number for the interval, or +\code{NULL} for auto-calculation.} + +\item{max_time}{Optional numeric value specifying the maximum time to consider for tick range.} +} +\value{ +A numeric vector of x-axis tick positions. +} +\description{ +Determines the positions for x-axis ticks based on the data and user input. +} +\keyword{internal} diff --git a/tests/testthat/test-gg_km.R b/tests/testthat/test-gg_km.R new file mode 100644 index 00000000..9b8e392e --- /dev/null +++ b/tests/testthat/test-gg_km.R @@ -0,0 +1,62 @@ +test_that("gg_km() works with default inputs", { + anl <- cards::ADTTE |> + dplyr::mutate(is_event = CNSR == 0) + by <- "TRTP" + anl[[by]] <- factor(anl[[by]], levels = c( + "Placebo", + "Xanomeline Low Dose", + "Xanomeline High Dose" + )) + group_sym <- rlang::sym(by) + model_formula <- rlang::new_formula( + lhs = rlang::expr(Surv(AVAL, is_event)), + rhs = rlang::expr(!!group_sym) + ) + + fit_kmg01 <- survival::survfit(model_formula, anl) + + expect_no_error( + surv_plot_data <- process_survfit(fit_kmg01) + ) + + expect_no_error( + suppressWarnings( + coxph_tbl <- get_cox_pairwise_df( + model_formula, + data = anl, + arm = by + ) + ) + ) + + expect_no_error( + plt_kmg01 <- gg_km(surv_plot_data) |> + annotate_surv_med(fit_kmg01) |> + annotate_coxph(coxph_tbl) |> + annotate_riskdf(fit_kmg01) + ) +}) + +test_that("df2gg() works with proper x-axis and without", { + # Example using proper x-axis + df <- as.data.frame(matrix(c( + # 0, 250, 500, 750, 1000 <-- (Reference) + 54, 28, 10, 3, 0, + 59, 35, 16, 5, 1, + 54, 25, 4, 0, 0 + ), nrow = 3, byrow = TRUE)) + + # Set names manually + colnames(df) <- c("0", "250", "500", "750", "1000") + rownames(df) <- c("A", "B", "C") + + # Example with proper x-axis + expect_no_error( + null <- df2gg(df, font_size = 8, add_proper_xaxis = TRUE) + ) + + # Example without proper x-axis + expect_no_error( + null <- df2gg(df, font_size = 8, add_proper_xaxis = FALSE, hline = FALSE) + ) +})