Merge pull request #12 from prabhakarlab/dev

Implements smoothing and parallelisation
prabhakarlab · Sep 27, 2023 · 7ba2a8a · 7ba2a8a
2 parents 27c6da1 + 3215d6d
commit 7ba2a8a
Show file tree

Hide file tree

Showing 20 changed files with 816 additions and 321 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: Banksy
 Title: Spatial transcriptomic clustering
-Version: 0.1.4
+Version: 0.1.5
 Authors@R: c(
     person(given = "Vipul",
            family = "Singhal",
@@ -26,7 +26,7 @@ Imports:
     uwot,
     dbscan,
     igraph,
-    leidenAlg,
+    leidenAlg (>= 1.1.0),
     mclust,
     ComplexHeatmap,
     circlize,
@@ -42,7 +42,9 @@ Imports:
     SummarizedExperiment,
     stats,
     utils,
-    progress
+    progress,
+    doParallel,
+    foreach
 License: file LICENSE
 Encoding: UTF-8
 URL: https://github.com/prabhakarlab/Banksy

diff --git a/NAMESPACE b/NAMESPACE
@@ -8,6 +8,7 @@ export(NormalizeBanksy)
 export(RunBanksyPCA)
 export(RunBanksyUMAP)
 export(ScaleBanksy)
+export(SmoothLabels)
 export(SplitBanksy)
 export(SubsetBanksy)
 export(asBanksyObject)
@@ -48,6 +49,8 @@ importFrom(RcppHungarian,HungarianSolver)
 importFrom(SummarizedExperiment,assays)
 importFrom(SummarizedExperiment,colData)
 importFrom(circlize,colorRamp2)
+importFrom(data.table,.N)
+importFrom(data.table,.SD)
 importFrom(data.table,`:=`)
 importFrom(data.table,data.table)
 importFrom(data.table,key)
@@ -58,6 +61,11 @@ importFrom(data.table,setnames)
 importFrom(data.table,setorder)
 importFrom(dbscan,kNN)
 importFrom(dbscan,sNN)
+importFrom(doParallel,registerDoParallel)
+importFrom(doParallel,stopImplicitCluster)
+importFrom(foreach,`%do%`)
+importFrom(foreach,`%dopar%`)
+importFrom(foreach,foreach)
 importFrom(ggalluvial,StatStratum)
 importFrom(ggalluvial,geom_flow)
 importFrom(ggalluvial,geom_stratum)

diff --git a/NEWS.md b/NEWS.md
@@ -1,4 +1,13 @@
 
+# Version 0.1.5
+
++ Implemented SmoothLabels for k-nearest neighbors cluster label smoothing
++ Parallel clustering for Leiden graph-based clustering
++ Version depedency on leidenAlg (>= 1.1.0) for compatibility with igraph (>= 1.5.0)
++ Seed setting for clustering
++ Neighborhood sampling for computing neighborhood feature matrices. See arguments 
+`sample_size`, `sample_renorm` and `seed` in function ComputeBanksy
+
 # Version 0.1.4
 
 + Implemented Azimuthal Gabor filters in ComputeBanksy, with the number of 

diff --git a/R/BanksyObject.R b/R/BanksyObject.R
@@ -81,7 +81,7 @@ BanksyObject <- function(own.expr = NULL, nbr.expr = NULL, harmonics = NULL,
         own.expr <- geneFilter(own.expr, genes.filter, min.cells.expressed)
 
         for (i in seq_len(nassays)) {
-            gcm <- own.expr[[i]]
+            gcm <- na_filter(own.expr[[i]], i)
             locs <- cell.locs[[i]]
             locs <- locs[match(colnames(gcm), rownames(locs)), ]
             if (!identical(colnames(gcm), rownames(locs))) {
@@ -108,6 +108,7 @@ BanksyObject <- function(own.expr = NULL, nbr.expr = NULL, harmonics = NULL,
         if (is.null(cell.locs)) stop('Provide cell location assay')
 
         own.expr <- as.matrix(own.expr)
+        own.expr <- na_filter(own.expr)
         own.expr <- own.expr[rowSums(own.expr > 0) >= min.cells.expressed, ]
 
         names(cell.locs) <- dimnames[seq_len(ncol(cell.locs))]
@@ -152,6 +153,82 @@ setValidity('BanksyObject', function(object) {
     return(check)
 })
 
+# Filter genes on init.
+geneFilter <- function(x, genes.filter, min.cells.expressed) {
+    ngenesBef <- vapply(x, function(x) dim(x)[1], FUN.VALUE = numeric(1))
+
+    if (genes.filter == 'union') {
+        all.genes <- Reduce(union, lapply(x, rownames))
+        x <- lapply(x, function(x) {
+            genes <- setdiff(all.genes, rownames(x))
+            if (length(genes) == 0) {
+                return(x)
+            } else {
+                append <- matrix(0,
+                                 nrow = length(genes),
+                                 ncol = ncol(x))
+                colnames(append) <- colnames(x)
+                rownames(append) <- genes
+                x <- rbind(x, append)
+                return(x)
+            }
+        })
+    } else if (genes.filter == 'intersect') {
+        common.genes <- Reduce(intersect, lapply(x, rownames))
+        x <- lapply(x, function(x)
+            x[rownames(x) %in% common.genes, ])
+
+        if (min.cells.expressed > 0) {
+            message('Filering genes expressed in less than ',
+                    min.cells.expressed,
+                    ' cells')
+            pass.genes <-
+                lapply(x, function(x)
+                    rownames(x)[rowSums(x > 0) >= min.cells.expressed])
+            pass.genes <- Reduce(intersect, pass.genes)
+            x <- lapply(x, function(x)
+                x[rownames(x) %in% pass.genes, ])
+            ngenesAft <-
+                vapply(x, function(x)
+                    dim(x)[1], FUN.VALUE = numeric(1))
+            filt <- ngenesBef - ngenesAft
+            for (i in seq_len(length(x))) {
+                message('Filtered ',
+                        filt[i],
+                        ' genes from dataset ',
+                        names(x)[i])
+            }
+
+        }
+    }
+
+    ## Harmonise gene name orderings
+    gene.names <- sort(rownames(x[[1]]))
+    x <- lapply(x, function(x) {
+        x[match(gene.names, rownames(x)), ]
+    })
+
+    return(x)
+}
+
+
+# Filter samples on init.
+na_filter = function(x, i=NULL) {
+    offenders = unique(which(is.na(x), arr.ind = TRUE)[,2])
+    if (length(offenders) == 0) {
+        return(x)
+    } else {
+        warn_msg = 'NAs in input matrix detected. Filtering out samples at indices '
+        warn_msg = paste0(warn_msg, paste(offenders, collapse = ', '))
+        if (!is.null(i)) {
+            warn_msg = paste0(warn_msg, ' for assay ', i)
+        }
+        warning(warn_msg)
+        return(x[,-offenders])
+    }
+}
+
+
 ## Getters ---------------------------------------------------------------------
 
 #' @param object BanksyObject