|
| 1 | + |
| 2 | +#' Do Sankey or Alluvial plots. |
| 3 | +#' |
| 4 | +#' @inheritParams doc_function |
| 5 | +#' @param first_group \strong{\code{\link[base]{character}}} | Categorical metadata variable. First group of nodes of the sankey plot. |
| 6 | +#' @param last_group \strong{\code{\link[base]{character}}} | Categorical metadata variable. Last group of nodes of the sankey plot. |
| 7 | +#' @param middle_groups \strong{\code{\link[base]{character}}} | Categorical metadata variable. Vector of groups of nodes of the sankey plot. |
| 8 | +#' @param type \strong{\code{\link[base]{character}}} | Type of plot to make. One of: |
| 9 | +#' \itemize{ |
| 10 | +#' \item \emph{\code{sankey}}: Generates a sankey plot. |
| 11 | +#' \item \emph{\code{alluvial}}: Generated an Alluvial plot, a kind of sankey plot where all groups have the same height. |
| 12 | +#' } |
| 13 | +#' @param width \strong{\code{\link[base]{numeric}}} | Width of the nodes. |
| 14 | +#' @param space \strong{\code{\link[base]{numeric}}} | Vertical space between the nodes. It appears to be equal to a single cell. Use big numbers to see a difference (like, 1000 or 10000). |
| 15 | +#' @param position \strong{\code{\link[base]{character}}} | GGplot2 position. |
| 16 | +#' @param node.fill \strong{\code{\link[base]{character}}} | Color to fill the nodes. |
| 17 | +#' @param node.color \strong{\code{\link[base]{character}}} | Color for the contour of the nodes. |
| 18 | +#' @param flow.alpha \strong{\code{\link[base]{character}}} | Alpha of the connections. |
| 19 | +#' @param flow.color \strong{\code{\link[base]{character}}} | Color for the contour of the connections. |
| 20 | +#' @param text_size \strong{\code{\link[base]{numeric}}} | Size of the labels. |
| 21 | +#' @param text_color \strong{\code{\link[base]{character}}} | Color of the labels. |
| 22 | +#' @param smooth \strong{\code{\link[base]{numeric}}} | How smooth the connections are. |
| 23 | +#' @param colors.first,colors.middle,colors.last \strong{\code{\link[base]{character}}} | Named vector of colors equal to ALL unique values in first_group, middle_groups, or last_group. |
| 24 | +#' @param use_labels \strong{\code{\link[base]{logical}}} | Whether to use labels or text for the node names. |
| 25 | +#' @param hjust \strong{\code{\link[base]{numeric}}} | General hjust for the labels. |
| 26 | +#' |
| 27 | +#' @return A ggplot2 object. |
| 28 | +#' @export |
| 29 | +#' |
| 30 | +#' @example /man/examples/examples_do_SankeyPlot.R |
| 31 | +do_SankeyPlot <- function(sample, |
| 32 | + first_group, |
| 33 | + last_group, |
| 34 | + type = "sankey", |
| 35 | + middle_groups = NULL, |
| 36 | + width = 0.1, |
| 37 | + space = ifelse(type == "sankey", 0.05 * ncol(sample), 0), |
| 38 | + position = "identity", |
| 39 | + node.fill = "white", |
| 40 | + node.color = "white", |
| 41 | + flow.alpha = 0.75, |
| 42 | + flow.color = "black", |
| 43 | + text_size = 3, |
| 44 | + text_color = "black", |
| 45 | + font.size = 14, |
| 46 | + font.type = "sans", |
| 47 | + smooth = 8, |
| 48 | + use_labels = FALSE, |
| 49 | + hjust = NULL, |
| 50 | + colors.first = NULL, |
| 51 | + colors.middle = NULL, |
| 52 | + colors.last = NULL, |
| 53 | + plot.title = NULL, |
| 54 | + plot.subtitle = NULL, |
| 55 | + plot.caption = NULL){ |
| 56 | + |
| 57 | + # Checks for packages. |
| 58 | + check_suggests(function_name = "do_SankeyPlot") |
| 59 | + # Check if the sample provided is a Seurat object. |
| 60 | + check_Seurat(sample = sample) |
| 61 | + |
| 62 | + # Check logical parameters. |
| 63 | + logical_list <- list("use_labels" = use_labels) |
| 64 | + check_type(parameters = logical_list, required_type = "logical", test_function = is.logical) |
| 65 | + # Check numeric parameters. |
| 66 | + numeric_list <- list("width" = width, |
| 67 | + "space" = space, |
| 68 | + "flow.alpha" = flow.alpha, |
| 69 | + "text_size" = text_size, |
| 70 | + "font.size" = font.size, |
| 71 | + "smooth" = smooth, |
| 72 | + "hjust" = hjust) |
| 73 | + check_type(parameters = numeric_list, required_type = "numeric", test_function = is.numeric) |
| 74 | + # Check character parameters. |
| 75 | + |
| 76 | + character_list <- list("first_group" = first_group, |
| 77 | + "last_group" = last_group, |
| 78 | + "middle_groups" = middle_groups, |
| 79 | + "type" = type, |
| 80 | + "position" = position, |
| 81 | + "node.color" = node.color, |
| 82 | + "flow.color" = flow.color, |
| 83 | + "text_color" = text_color, |
| 84 | + "font.type" = font.type, |
| 85 | + "colors.first" = colors.first, |
| 86 | + "colors.middle" = colors.middle, |
| 87 | + "colors.last" = colors.last, |
| 88 | + "node.fill" = node.fill, |
| 89 | + "plot.title" = plot.title, |
| 90 | + "plot.subtitle" = plot.subtitle, |
| 91 | + "plot.caption" = plot.caption) |
| 92 | + # Checks |
| 93 | + check_type(parameters = character_list, required_type = "character", test_function = is.character) |
| 94 | + |
| 95 | + check_colors(node.color, parameter_name = "node.color") |
| 96 | + check_colors(flow.color, parameter_name = "flow.color") |
| 97 | + check_colors(text_color, parameter_name = "text_color") |
| 98 | + |
| 99 | + check_parameters(parameter = font.type, parameter_name = "font.type") |
| 100 | + |
| 101 | + # Wrong type. |
| 102 | + assertthat::assert_that(type %in% c("alluvial", "sankey"), |
| 103 | + msg = "Please provide either sankey or alluvial to type.") |
| 104 | + |
| 105 | + # Wrong position. |
| 106 | + assertthat::assert_that(position %in% c("identity", "nudge"), |
| 107 | + msg = "This position type has not been tested.") |
| 108 | + |
| 109 | + # Not a metadata column. |
| 110 | + assertthat::assert_that(first_group %in% colnames(sample@meta.data), |
| 111 | + msg = "The metadata variable for first_group is not in the metadata of the object.") |
| 112 | + |
| 113 | + assertthat::assert_that(class(sample@meta.data[, first_group]) %in% c("character", "factor"), |
| 114 | + msg = "The metadata variable for first_group has to be either a character vector or a factor.") |
| 115 | + |
| 116 | + assertthat::assert_that(last_group %in% colnames(sample@meta.data), |
| 117 | + msg = "The metadata variable for last_group is not in the metadata of the object.") |
| 118 | + |
| 119 | + assertthat::assert_that(class(sample@meta.data[, last_group]) %in% c("character", "factor"), |
| 120 | + msg = "The metadata variable for last_group has to be either a character vector or a factor.") |
| 121 | + |
| 122 | + |
| 123 | + for (var in middle_groups){ |
| 124 | + assertthat::assert_that(var %in% colnames(sample@meta.data), |
| 125 | + msg = "The metadata variable for middle_groups is not in the metadata of the object.") |
| 126 | + |
| 127 | + assertthat::assert_that(class(sample@meta.data[, var]) %in% c("character", "factor"), |
| 128 | + msg = "The metadata variable for middle_groups has to be either a character vector or a factor.") |
| 129 | + } |
| 130 | + |
| 131 | + |
| 132 | + `%>%` <- magrittr::`%>%` |
| 133 | + |
| 134 | + data <- suppressWarnings({sample@meta.data %>% |
| 135 | + dplyr::select(dplyr::all_of(c(first_group, middle_groups, last_group))) %>% |
| 136 | + tibble::rownames_to_column(var = "cell") %>% |
| 137 | + dplyr::select(-.data$cell) %>% |
| 138 | + ggsankey::make_long(dplyr::all_of(c(first_group, middle_groups, last_group))) %>% |
| 139 | + dplyr::rowwise() %>% |
| 140 | + dplyr::mutate(hjust = if(.data$x %in% middle_groups){0.5} |
| 141 | + else if (.data$x == last_group){0} |
| 142 | + else if (.data$x == first_group){1})}) |
| 143 | + if (!is.null(hjust)){data$hjust <- hjust} |
| 144 | + |
| 145 | + if (!(is.null(colors.first))){ |
| 146 | + check_colors(colors.first, parameter_name = "colors.first") |
| 147 | + if (sum(names(colors.first) %!in% unique(sample@meta.data[, first_group])) > 0){ |
| 148 | + stop("Not all colors provided for the first group match the unique values for first_group.", call. = FALSE) |
| 149 | + } |
| 150 | + |
| 151 | + if (length(colors.first) != length(unique(sample@meta.data[, first_group]))){ |
| 152 | + stop("The colors provided for the first group do not match the number of unique nodes.", call. = FALSE) |
| 153 | + } |
| 154 | + } else { |
| 155 | + colors.first <- viridis::viridis(n = length(unique(sample@meta.data[, first_group])), option = "G") |
| 156 | + if (is.factor(sample@meta.data[, first_group])){ |
| 157 | + names(colors.first) <- levels(sample@meta.data[, first_group]) |
| 158 | + } else { |
| 159 | + names(colors.first) <- sort(unique(sample@meta.data[, first_group])) |
| 160 | + } |
| 161 | + } |
| 162 | + |
| 163 | + if (!(is.null(colors.last))){ |
| 164 | + check_colors(colors.last, parameter_name = "colors.last") |
| 165 | + if (sum(names(colors.last) %!in% unique(sample@meta.data[, last_group])) > 0){ |
| 166 | + stop("Not all colors provided for the last group match the unique values for last_group", call. = FALSE) |
| 167 | + } |
| 168 | + |
| 169 | + if (length(colors.last) != length(unique(sample@meta.data[, last_group]))){ |
| 170 | + stop("The colors provided for the last group do not match the number of unique nodes.", call. = FALSE) |
| 171 | + } |
| 172 | + } else{ |
| 173 | + colors.last <- viridis::viridis(n = length(unique(sample@meta.data[, last_group])), option = "D") |
| 174 | + if (is.factor(sample@meta.data[, last_group])){ |
| 175 | + names(colors.last) <- levels(sample@meta.data[, last_group]) |
| 176 | + } else { |
| 177 | + names(colors.last) <- sort(unique(sample@meta.data[, last_group])) |
| 178 | + } |
| 179 | + } |
| 180 | + |
| 181 | + if (!(is.null(colors.middle))){ |
| 182 | + check_colors(colors.middle, parameter_name = "colors.middle") |
| 183 | + |
| 184 | + unique_middle_values <- c() |
| 185 | + for(var in middle_groups){ |
| 186 | + if (is.factor(sample@meta.data[, var])){ |
| 187 | + unique_middle_values <- c(unique_middle_values, levels(sample@meta.data[, var])) |
| 188 | + } else { |
| 189 | + unique_middle_values <- c(unique_middle_values, sort(unique(sample@meta.data[, var]))) |
| 190 | + } |
| 191 | + } |
| 192 | + |
| 193 | + if (sum(names(colors.middle) %!in% unique_middle_values) > 0){ |
| 194 | + stop("Not all colors provided for the middle groups match the unique values for middle_groups", call. = FALSE) |
| 195 | + } |
| 196 | + |
| 197 | + if (length(colors.middle) != length(unique_middle_values)){ |
| 198 | + stop("The colors provided for the middle groups do not match the number of unique nodes.", call. = FALSE) |
| 199 | + } |
| 200 | + } else { |
| 201 | + unique_middle_values <- c() |
| 202 | + for(var in middle_groups){ |
| 203 | + if (is.factor(sample@meta.data[, var])){ |
| 204 | + unique_middle_values <- c(unique_middle_values, levels(sample@meta.data[, var])) |
| 205 | + } else { |
| 206 | + unique_middle_values <- c(unique_middle_values, sort(unique(sample@meta.data[, var]))) |
| 207 | + } |
| 208 | + } |
| 209 | + |
| 210 | + colors.middle <- viridis::viridis(n = length(unique_middle_values), option = "C") |
| 211 | + names(colors.middle) <- unique_middle_values |
| 212 | + } |
| 213 | + |
| 214 | + colors.use <- c(colors.first, colors.middle, colors.last) |
| 215 | + func_use <- ifelse(isTRUE(use_labels), ggsankey::geom_sankey_label, ggsankey::geom_sankey_text) |
| 216 | + |
| 217 | + p <- data %>% |
| 218 | + |
| 219 | + ggplot2::ggplot(mapping = ggplot2::aes(x = .data$x, |
| 220 | + next_x = .data$next_x, |
| 221 | + node = .data$node, |
| 222 | + next_node = .data$next_node, |
| 223 | + fill = factor(.data$node), |
| 224 | + label = .data$node, |
| 225 | + hjust = .data$hjust)) + |
| 226 | + ggsankey::geom_sankey(flow.alpha = flow.alpha, |
| 227 | + node.color = node.color, |
| 228 | + node.fill = node.fill, |
| 229 | + color = flow.color, |
| 230 | + width = width, |
| 231 | + position = position, |
| 232 | + type = type, |
| 233 | + space = space) + |
| 234 | + func_use(size = text_size, |
| 235 | + color = text_color, |
| 236 | + fontface = "bold", |
| 237 | + position = position, |
| 238 | + type = type, |
| 239 | + space = space) + |
| 240 | + ggplot2::scale_fill_manual(values = colors.use) + |
| 241 | + ggplot2::xlab("") + |
| 242 | + ggplot2::ylab("") + |
| 243 | + ggplot2::labs(title = plot.title, |
| 244 | + subtitle = plot.subtitle, |
| 245 | + caption = plot.caption) + |
| 246 | + ggplot2::theme_minimal(base_size = font.size) + |
| 247 | + ggplot2::theme(axis.title = ggplot2::element_text(color = "black", |
| 248 | + face = "bold"), |
| 249 | + axis.line.x = ggplot2::element_blank(), |
| 250 | + axis.text.x = ggplot2::element_text(color = "black", |
| 251 | + face = "bold", |
| 252 | + angle = 0, |
| 253 | + hjust = 0.5, |
| 254 | + vjust = 1), |
| 255 | + axis.text.x.top = ggplot2::element_text(color = "black", |
| 256 | + face = "bold", |
| 257 | + angle = 0, |
| 258 | + hjust = 0.5, |
| 259 | + vjust = 1), |
| 260 | + axis.text.y = ggplot2::element_blank(), |
| 261 | + axis.ticks = ggplot2::element_blank(), |
| 262 | + panel.grid.major = ggplot2::element_blank(), |
| 263 | + plot.title.position = "plot", |
| 264 | + plot.title = ggplot2::element_text(face = "bold", hjust = 0), |
| 265 | + plot.subtitle = ggplot2::element_text(hjust = 0), |
| 266 | + plot.caption = ggplot2::element_text(hjust = 1), |
| 267 | + panel.grid = ggplot2::element_blank(), |
| 268 | + text = ggplot2::element_text(family = font.type), |
| 269 | + plot.caption.position = "plot", |
| 270 | + legend.text = ggplot2::element_text(face = "bold"), |
| 271 | + legend.position = "none", |
| 272 | + legend.title = ggplot2::element_text(face = "bold"), |
| 273 | + legend.justification = "center", |
| 274 | + plot.margin = ggplot2::margin(t = 10, r = 10, b = 10, l = 10), |
| 275 | + plot.background = ggplot2::element_rect(fill = "white", color = "white"), |
| 276 | + panel.background = ggplot2::element_rect(fill = "white", color = "white"), |
| 277 | + legend.background = ggplot2::element_rect(fill = "white", color = "white"), |
| 278 | + strip.text =ggplot2::element_text(color = "black", face = "bold")) |
| 279 | + |
| 280 | + return(p) |
| 281 | +} |
0 commit comments