PharmacoGenomics_0123 (#304)

* PharmacoGenomics_0123 Ref: https://github.com/mugpeng/OmicsPharDB/ * update zenodo link * 0124_1st * 0202 use spearman cor, and log2(Odds) * use appdata & PGdata indeps --------- Co-authored-by: ShixiangWang <[email protected]>
openbiox · Feb 13, 2024 · a3c5a5c · a3c5a5c
1 parent 50ae5ae
commit a3c5a5c
Show file tree

Hide file tree

Showing 14 changed files with 1,228 additions and 191 deletions.
diff --git a/.gitignore b/.gitignore
@@ -69,3 +69,5 @@ inst/extdata/ccle_drug_response_extend.rda
 inst/extdata/pancancer_conserved_immune_subtype.rda
 inst/extdata/toil_sig_score.rda
 docs
+
+inst/extdata/OP*
diff --git a/R/load_data.R b/R/load_data.R
@@ -23,7 +23,7 @@
 #'   - `TCGA.organ`: TCGA organ data.
 #'   - `toil_info`: Toil hub information.
 #'
-#' **Remote datasets stored in [Zenodo](https://zenodo.org/record/10340116/)**:
+#' **Remote datasets stored in [Zenodo](https://zenodo.org/record/10554197/)**:
 #'   - `pcawg_promoter_id`: PCAWG promoter identifiers.
 #'   - `transcript_identifier`: Common transcript identifiers.
 #'   - `ccle_expr_and_drug_response`: CCLE expression and drug response data.
@@ -79,7 +79,7 @@ load_data <- function(name) {
     if (!file.exists(data_path)) {
       # Download it to inst/extdata from zenodo
       # Then load it
-      data_url <- file.path("https://zenodo.org/record/10340116/files", name2)
+      data_url <- file.path("https://zenodo.org/record/10554197/files", name2)
       if (tryCatch(httr::status_code(httr::HEAD(data_url)), 
                    error = function(e) 404) != 200) {
         # Use an alternative OSS approach

diff --git a/inst/shinyapp/App.R b/inst/shinyapp/App.R
@@ -74,6 +74,7 @@ pacman::p_load(
   patchwork,
   ggpubr,
   plotly,
+  UpSetR,
   UCSCXenaTools,
   UCSCXenaShiny,
   shiny,
@@ -108,191 +109,18 @@ options(shiny.maxRequestSize=1024*1024^2)
 message("Starting...")
 
 # Put data here -----------------------------------------------------------
-data("XenaData", package = "UCSCXenaTools", envir = environment())
-xena_table <- XenaData[, c(
-  "XenaDatasets", "XenaHostNames", "XenaCohorts",
-  "SampleCount", "DataSubtype", "Label", "Unit"
-)]
-xena_table$SampleCount <- as.integer(xena_table$SampleCount)
-colnames(xena_table)[c(1:3)] <- c("Dataset ID", "Hub", "Cohort")
-
-# Used in TCGA survival module
-TCGA_datasets <- xena_table %>%
-  dplyr::filter(Hub == "tcgaHub") %>%
-  dplyr::select("Cohort") %>%
-  unique() %>%
-  dplyr::mutate(
-    id = stringr::str_match(Cohort, "\\((\\w+?)\\)")[, 2],
-    des = stringr::str_match(Cohort, "(.*)\\s+\\(")[, 2]
-  ) %>%
-  dplyr::arrange(id)
-
-# Used in genecor and pancan-search-cancer module script
-tcga_cancer_choices <- c(
-  "SKCM", "THCA", "SARC", "PRAD", "PCPG", "PAAD", "HNSC", "ESCA",
-  "COAD", "CESC", "BRCA", "TGCT", "KIRP", "KIRC", "LAML", "READ",
-  "OV", "LUAD", "LIHC", "UCEC", "GBM", "LGG", "UCS", "THYM", "STAD",
-  "DLBC", "LUSC", "MESO", "KICH", "UVM", "BLCA", "CHOL", "ACC"
-)
-
-TCGA_cli_merged <- dplyr::full_join(
-  load_data("tcga_clinical"),
-  load_data("tcga_surv"),
-  by = "sample"
-)
-
-pancan_identifiers <- readRDS(
-  system.file(
-    "extdata", "pancan_identifier_list.rds",
-    package = "UCSCXenaShiny"
-  )
-)
-all_preload_identifiers <- c("NONE", unique(as.character(unlist(pancan_identifiers))))
-tryCatch(
-  load_data("transcript_identifier"),
-  error = function(e) {
-    stop("Load data failed, please run load_data('transcript_identifier') by hand before restarting the Shiny.")
-  }
-)
-
-phenotype_datasets <- UCSCXenaTools::XenaData %>%
-  dplyr::filter(Type == "clinicalMatrix") %>%
-  dplyr::pull(XenaDatasets)
-
-
-themes_list <- list(
-  "cowplot" = cowplot::theme_cowplot(),
-  "Light" = theme_light(),
-  "Minimal" = theme_minimal(),
-  "Classic" = theme_classic(),
-  "Gray" = theme_gray(),
-  "half_open" = cowplot::theme_half_open(),
-  "minimal_grid" = cowplot::theme_minimal_grid()
-)
-
-
-## 通路基因
-PW_meta <- load_data("tcga_PW_meta")
-PW_meta <- PW_meta %>% 
-  dplyr::arrange(Name) %>%
-  dplyr::mutate(size = purrr::map_int(Gene, function(x){
-    x_ids = strsplit(x, "/", fixed = TRUE)[[1]]
-    length(x_ids)
-  }), .before = 5) %>% 
-  dplyr::mutate(display = paste0(Name, " (", size, ")"), .before = 6)
-
-msigdbr_types <- data.frame(
-  gs_cat = c("H","C1", "C2", "C2", "C2", "C2", "C2", "C2", "C2", 
-             "C3", "C3", "C3", "C3", "C4", "C4", "C5", "C5", 
-             "C5", "C5", "C6", "C7", "C7", "C8"),
-  gs_subcat = c("","", "CGP", "CP", "CP:BIOCARTA", "CP:KEGG", 
-                "CP:PID", "CP:REACTOME", "CP:WIKIPATHWAYS", 
-                "MIR:MIRDB", "MIR:MIR_Legacy", "TFT:GTRD", 
-                "TFT:TFT_Legacy", "CGN", "CM", "GO:BP", 
-                "GO:CC", "GO:MF", "HPO", "", "IMMUNESIGDB", 
-                "VAX", "")
-)
-msigdbr_types = msigdbr_types %>% 
-  dplyr::mutate(gs_subcat2 = ifelse(gs_subcat=="",gs_cat,gs_subcat)) %>% 
-  dplyr::mutate(gs_subcat_label = paste0(gs_cat, "--",gs_subcat2))
-
-#   gs_cat   gs_subcat  gs_subcat2 gs_subcat_label
-# 1      H                       H            H--H
-# 2     C1                      C1          C1--C1
-# 3     C2         CGP         CGP         C2--CGP
-# 4     C2          CP          CP          C2--CP
-
-
-
-
-
-
-
-
-## TCGA/PCAWG/CCLE value & id for general analysis
-general_value_id = UCSCXenaShiny:::query_general_id()
-# id
-tcga_id_option = general_value_id[["id"]][[1]]
-pcawg_id_option = general_value_id[["id"]][[2]]
-ccle_id_option = general_value_id[["id"]][[3]]
-# value
-tcga_value_option = general_value_id[["value"]][[1]]
-tcga_index_value = tcga_value_option[["Tumor index"]]
-tcga_immune_value = tcga_value_option[["Immune Infiltration"]]
-tcga_pathway_value = tcga_value_option[["Pathway activity"]]
-tcga_phenotype_value = tcga_value_option[["Phenotype data"]]
-
-pcawg_value_option = general_value_id[["value"]][[2]]
-pcawg_index_value = pcawg_value_option[["Tumor index"]]
-pcawg_immune_value = pcawg_value_option[["Immune Infiltration"]]
-pcawg_pathway_value = pcawg_value_option[["Pathway activity"]]
-pcawg_phenotype_value = pcawg_value_option[["Phenotype data"]]
-
-ccle_value_option = general_value_id[["value"]][[3]]
-ccle_index_value = ccle_value_option[["Tumor index"]]
-ccle_phenotype_value = ccle_value_option[["Phenotype data"]]
-
-
-TIL_signatures = lapply(tcga_id_option$`Immune Infiltration`, function(x) {
-  x$all
-}) %>% reshape2::melt() %>% 
-  dplyr::mutate(x = paste0(value,"_",L1)) %>%
-  dplyr::pull(x)
-
-
-
-# Help → ID reference
-tcga_id_referrence = load_data("pancan_identifier_help")
-pcawg_id_referrence = load_data("pcawg_identifier")
-ccle_id_referrence = load_data("ccle_identifier")
-
-
-
-code_types = list("NT"= "NT (normal tissue)",
-          "TP"= "TP (primary tumor)",
-          "TR"= "TR (recurrent tumor)",
-          "TB"= "TB (blood derived tumor)",
-          "TAP"="TAP (additional primary)",
-          "TM"= "TM (metastatic tumor)",
-          "TAM"="TAM (additional metastatic)")
-
-# CCLE tissues for drug analysis
-# "ALL" means all tissues
-ccle_drug_related_tissues <- c(
-  "ALL", "prostate", "central_nervous_system", "urinary_tract", "haematopoietic_and_lymphoid_tissue",
-  "kidney", "thyroid", "soft_tissue", "skin", "salivary_gland",
-  "ovary", "lung", "bone", "endometrium", "pancreas", "breast",
-  "large_intestine", "upper_aerodigestive_tract", "autonomic_ganglia",
-  "stomach", "liver", "biliary_tract", "pleura", "oesophagus"
-)
-
-# Data summary
-Data_hubs_number <- length(unique(xena_table$Hub))
-Cohorts_number <- length(unique(xena_table$Cohort))
-Datasets_number <- length(unique(xena_table$`Dataset ID`))
-Samples_number <- "~2,000,000"
-Primary_sites_number <- "~37"
-Data_subtypes_number <- "~45"
-Xena_summary <- dplyr::group_by(xena_table, Hub) %>%
-  dplyr::summarise(
-    n_cohort = length(unique(.data$Cohort)),
-    n_dataset = length(unique(.data$`Dataset ID`)), .groups = "drop"
-  )
-
-# PCAWG project info
-pcawg_items = sort(unique(pcawg_info_fine$Project)) #30
-dcc_project_code_choices <- c(
-  "BLCA-US", "BRCA-US", "OV-AU", "PAEN-AU", "PRAD-CA", "PRAD-US", "RECA-EU", "SKCM-US", "STAD-US",
-  "THCA-US", "KIRP-US", "LIHC-US", "PRAD-UK", "LIRI-JP", "PBCA-DE", "CESC-US", "PACA-AU", "PACA-CA",
-  "LAML-KR", "COAD-US", "ESAD-UK", "LINC-JP", "LICA-FR", "CLLE-ES", "HNSC-US", "EOPC-DE", "BRCA-UK",
-  "BOCA-UK", "MALY-DE", "CMDI-UK", "BRCA-EU", "ORCA-IN", "BTCA-SG", "SARC-US", "KICH-US", "MELA-AU",
-  "DLBC-US", "GACA-CN", "PAEN-IT", "GBM-US", "KIRC-US", "LAML-US", "LGG-US", "LUAD-US", "LUSC-US",
-  "OV-US", "READ-US", "UCEC-US"
-)
-
-
-# Global color
-mycolor <- c(RColorBrewer::brewer.pal(12, "Paired"))
+#source(system.file("shinyapp/appdata.R", package = "UCSCXenaShiny"))
+
+appdata_path = path.expand(file.path(getOption("xena.cacheDir"), "appdata.RData"))
+message("Checking shiny app data from ", appdata_path, "...")
+if (!file.exists(appdata_path)) {
+  message("Processing and caching shiny app data...")
+  source(system.file("shinyapp/appdata.R", package = "UCSCXenaShiny"))
+  message("Done")
+} else {
+  load(appdata_path)
+  message("Cached processed data used.")
+}
 
 # Put modules here --------------------------------------------------------
 modules_path <- system.file("shinyapp", "modules", package = "UCSCXenaShiny", mustWork = TRUE)
@@ -429,6 +257,7 @@ ui <- tagList(
     ui.page_general_analysis(),
     ui.page_pancan_quick(),
     ui.page_pancan_tcga(),
+    ui.page_PharmacoGenomics(),
     # ui.page_pancan_pcawg(),
     # ui.page_pancan_ccle(),
     ui.page_download(),
@@ -445,11 +274,14 @@ ui <- tagList(
   )
 )
 
+
 # Server Part ---------------------------------------------------------------
 server <- function(input, output, session) {
   message("Shiny app run successfully! Enjoy it!\n")
   message("               --  Xena shiny team\n")
-
+  # Stop warn
+  storeWarn <- getOption("warn")
+  options(warn = -1) 
   # observe(print(input$navbar))
 
   # inst/shinyapp/server