diff --git a/bin/characterization_BayesSpace.R b/bin/characterization_BayesSpace.R index d3760b2..3e553db 100644 --- a/bin/characterization_BayesSpace.R +++ b/bin/characterization_BayesSpace.R @@ -6,36 +6,36 @@ library(SingleCellExperiment) library(ggplot2) library(BayesSpace) library(Matrix) -library(reticulate) - - -# Parse command-line arguments -parser <- ArgumentParser() - -args <- parser$add_argument_group("Agruments", "required and optional arguments") - -args$add_argument("--filePath", help="Path to npz counts file", metavar="dir", required=TRUE) -args$add_argument("--nameX", default="st_adata_X.npz", help="Path to X", metavar="file", required=FALSE) -args$add_argument("--nameVar", default="st_adata.var.csv", help="Path to features metadata", metavar="file", required=FALSE) -args$add_argument("--nameObs", default="st_adata.obs.csv", help="Path to observation metadata", metavar="file", required=FALSE) -args$add_argument("--countsFactor", default=100, help="factor", metavar="factor", required=FALSE) -args$add_argument("--numberHVG", default=2000, help="factor", metavar="factor", required=FALSE) -args$add_argument("--numberPCs", default=7, help="factor", metavar="factor", required=FALSE) -args$add_argument("--minClusters", default=2, help="factor", metavar="factor", required=FALSE) -args$add_argument("--maxClusters", default=10, help="factor", metavar="factor", required=FALSE) -args$add_argument("--optimalQ", default=5, help="factor", metavar="factor", required=FALSE) -args$add_argument("--STplatform", default="Visium", help="Technology grid", metavar="factor", required=FALSE) - +library(reticulate) + + +# Parse command-line arguments +parser <- ArgumentParser() + +args <- parser$add_argument_group("Agruments", "required and optional arguments") + +args$add_argument("--filePath", help="Path to npz counts file", metavar="dir", required=TRUE) +args$add_argument("--nameX", default="st_adata_X.npz", help="Path to X", metavar="file", required=FALSE) +args$add_argument("--nameVar", default="st_adata.var.csv", help="Path to features metadata", metavar="file", required=FALSE) +args$add_argument("--nameObs", default="st_adata.obs.csv", help="Path to observation metadata", metavar="file", required=FALSE) +args$add_argument("--countsFactor", default=100, help="factor", metavar="factor", required=FALSE) +args$add_argument("--numberHVG", default=2000, type="integer", help="factor", metavar="factor", required=FALSE) +args$add_argument("--numberPCs", default=7, type="integer", help="factor", metavar="factor", required=FALSE) +args$add_argument("--minClusters", default=2, type="integer", help="factor", metavar="factor", required=FALSE) +args$add_argument("--maxClusters", default=10, type="integer", help="factor", metavar="factor", required=FALSE) +args$add_argument("--optimalQ", default=5, type="integer", help="factor", metavar="factor", required=FALSE) +args$add_argument("--STplatform", default="Visium", help="Technology grid", metavar="factor", required=FALSE) + args$add_argument("--qtuneSaveName", default="st_bayes_qtune.png", help="file name", metavar="file", required=FALSE) args$add_argument("--bayesClustersName", default="st_bayes_clusters.png", help="file name", metavar="file", required=FALSE) args$add_argument("--bayesClustersEnhancedName", default="st_bayes_clusters_enhanced.png", help="file name", metavar="file", required=FALSE) args$add_argument("--bayesOriginalEnhancedFeatures", default="st_bayes_original_and_enhanced.png", help="file name", metavar="file", required=FALSE) args$add_argument("--bayesEnhancedMarkers", default="bayes_enhanced_markers.csv", help="file name", metavar="file", required=FALSE) args$add_argument("--bayesSubspotCoord", default="bayes_subspot_cluster_and_coord.csv", help="file name", metavar="file", required=FALSE) -args$add_argument("--bayesSpotCluster", default="bayes_spot_cluster.csv", help="file name", metavar="file", required=FALSE) - -args <- parser$parse_args() - +args$add_argument("--bayesSpotCluster", default="bayes_spot_cluster.csv", help="file name", metavar="file", required=FALSE) + +args <- parser$parse_args() + # #### Spatial clustering: # The latent cluster is modeled to depend on three parameters: cluster mean, fixed precision matrix and scaling factor.
@@ -52,8 +52,8 @@ args <- parser$parse_args() # #### Tuning number of clusters: # Elbow of negative pseudo-log-likelihood curve, that estimates how well the model fit the data for each number of clusters.
- - + + # Main script set.seed(123) np <- import("numpy") @@ -80,6 +80,13 @@ count.data <- np$load(paste0(normDataDir, args$nameX))[['arr_0']] * args$countsF colnames(count.data) <- st_obs_all$X rownames(count.data) <- rowData +print(args$countsFactor) +print(args$STplatform) +print(args$numberPCs) +print(args$numberHVG) +print(args$minClusters) + + dsp <- SingleCellExperiment(assays=list(counts=count.data), rowData=row_df, colData=col_df) dsp <- spatialPreprocess(dsp, platform=args$STplatform, n.PCs=args$numberPCs, n.HVGs=args$numberHVG, log.normalize=TRUE) @@ -92,7 +99,7 @@ dsp <- spatialCluster(dsp, q=args$optimalQ, platform=args$STplatform, d=args$num clusterPlot(dsp, palette=c("purple", "cyan", "blue", "yellow", "red"), color=NA) + theme_bw() + xlab("Column") + ylab("Row") + labs(fill="BayesSpace\ncluster", title="Spatial clustering") ggsave(paste0(normDataDir, args$bayesClustersName), dpi=600, scale=0.75, width=8, height=8, units="in") - + dsp.enhanced <- spatialEnhance(dsp, q=args$optimalQ, platform="Visium", d=args$numberPCs, model="t", gamma=2, jitter_prior=0.3, jitter_scale=3.5, nrep=1000, burn.in=100) clusterPlot(dsp.enhanced, palette=c("purple", "cyan", "blue", "yellow", "red"), color=NA) + theme_bw() + xlab("Column") + ylab("Row") + labs(fill="BayesSpace\ncluster", title="Spatial clustering") @@ -119,4 +126,4 @@ write.csv(df_enhanced_markers, file=paste0(args$filePath, args$bayesEnhancedMark df_spot_cluster <- subset(as.data.frame(dsp@colData), select=-c(imagecol, imagerow)) write.csv(df_spot_cluster, file=paste0(args$filePath, args$bayesSpotCluster)) -quit(status=0) +quit(status=0) diff --git a/bin/characterization_SPOTlight.R b/bin/characterization_SPOTlight.R index 0a2d29f..dfc75fc 100644 --- a/bin/characterization_SPOTlight.R +++ b/bin/characterization_SPOTlight.R @@ -34,17 +34,17 @@ args$add_argument("--SCnameObs", default="sc_adata.obs.csv", help="Path to obser args$add_argument("--fileh5", default="raw_feature_bc_matrix.h5", help="File HDF5", metavar="file", required=FALSE) # "filtered_feature_bc_matrix.h5" args$add_argument("--outsSubDir", default="raw_feature_bc_matrix/", help="dir", metavar="file", required=FALSE) -args$add_argument("--mtxGeneColumn", default=2, help="columns index", metavar="col", required=FALSE) -args$add_argument("--countsFactor", default=100, help="factor", metavar="factor", required=FALSE) +args$add_argument("--mtxGeneColumn", default=2, type="integer", help="columns index", metavar="col", required=FALSE) +args$add_argument("--countsFactor", default=100, type="integer", help="factor", metavar="factor", required=FALSE) -args$add_argument("--clusterResolution", default=0.3, help="factor", metavar="factor", required=FALSE) +args$add_argument("--clusterResolution", default=0.3, type="double", help="factor", metavar="factor", required=FALSE) -args$add_argument("--numberHVG", default=3000, help="factor", metavar="factor", required=FALSE) -args$add_argument("--numberCellsPerCelltype", default=100, help="factor", metavar="factor", required=FALSE) +args$add_argument("--numberHVG", default=3000, type="integer", help="factor", metavar="factor", required=FALSE) +args$add_argument("--numberCellsPerCelltype", default=100, type="integer", help="factor", metavar="factor", required=FALSE) args$add_argument("--NMFsaveFile", default="SPOTlight_ls_mk_normed.rds", help="File to save NMF RDS", metavar="file", required=FALSE) args$add_argument("--SPOTlightScatterpiesName", default="SPOTlight_st_scatterpies.png", help="dir", metavar="file", required=FALSE) -args$add_argument("--SPOTlightScatterpiesSize", default=0.35, help="dir", metavar="file", required=FALSE) +args$add_argument("--SPOTlightScatterpiesSize", default=0.35, type="double", help="dir", metavar="file", required=FALSE) args$add_argument("--SPOTlightSCclustersName", default="SPOTlight_sc_clusters.png", help="dir", metavar="file", required=FALSE) args$add_argument("--SPOTlightTopicsName", default="SPOTlight_st_topic_profiles.png", help="dir", metavar="file", required=FALSE) args$add_argument("--SPOTlightFeaturesName", default="SPOTlight_st_prop.png", help="dir", metavar="file", required=FALSE) diff --git a/bin/characterization_STdeconvolve.R b/bin/characterization_STdeconvolve.R index 6e7535f..a5239b8 100644 --- a/bin/characterization_STdeconvolve.R +++ b/bin/characterization_STdeconvolve.R @@ -29,19 +29,19 @@ args$add_argument("--SCnameObs", default="sc_adata.obs.csv", help="Path to obser args$add_argument("--fileh5", default="raw_feature_bc_matrix.h5", help="File HDF5", metavar="file", required=FALSE) # "filtered_feature_bc_matrix.h5" args$add_argument("--outsSubDir", default="raw_feature_bc_matrix/", help="dir", metavar="file", required=FALSE) -args$add_argument("--mtxGeneColumn", default=2, help="columns index", metavar="col", required=FALSE) -args$add_argument("--countsFactor", default=100, help="factor", metavar="factor", required=FALSE) +args$add_argument("--mtxGeneColumn", default=2, type="integer", help="columns index", metavar="col", required=FALSE) +args$add_argument("--countsFactor", default=100, type="integer", help="factor", metavar="factor", required=FALSE) -args$add_argument("--corpusRemoveAbove", default=1.0, help="factor", metavar="factor", required=FALSE) -args$add_argument("--corpusRemoveBelow", default=0.05, help="factor", metavar="factor", required=FALSE) +args$add_argument("--corpusRemoveAbove", default=1.0, type="double", help="factor", metavar="factor", required=FALSE) +args$add_argument("--corpusRemoveBelow", default=0.05, type="double", help="factor", metavar="factor", required=FALSE) -args$add_argument("--LDAminTopics", default=8, help="factor", metavar="factor", required=FALSE) -args$add_argument("--LDAmaxTopics", default=9, help="factor", metavar="factor", required=FALSE) +args$add_argument("--LDAminTopics", default=8, type="integer", help="factor", metavar="factor", required=FALSE) +args$add_argument("--LDAmaxTopics", default=9, type="integer", help="factor", metavar="factor", required=FALSE) args$add_argument("--LDAsaveFile", default="STdeconvolve_optLDA.rds", help="File to save LDA RDS", metavar="file", required=FALSE) args$add_argument("--STdeconvolveScatterpiesName", default="STdeconvolve_st_scatterpies.png", help="dir", metavar="file", required=FALSE) -args$add_argument("--STdeconvolveScatterpiesSize", default=2.85, help="dir", metavar="file", required=FALSE) -args$add_argument("--STdeconvolveFeaturesSizeFactor", default=1.0, help="dir", metavar="file", required=FALSE) +args$add_argument("--STdeconvolveScatterpiesSize", default=2.85, type="double", help="dir", metavar="file", required=FALSE) +args$add_argument("--STdeconvolveFeaturesSizeFactor", default=1.0, type="double", help="dir", metavar="file", required=FALSE) args$add_argument("--STdeconvolveFeaturesName", default="STdeconvolve_st_prop.png", help="dir", metavar="file", required=FALSE) args$add_argument("--STdeconvolveCorrName", default="STdeconvolve_st_prop_corr.png", help="dir", metavar="file", required=FALSE) args$add_argument("--STdeconvolvePropNormName", default="STdeconvolve_prop_norm.csv", help="dir", metavar="file", required=FALSE) diff --git a/bin/stSpatialDE.py b/bin/stSpatialDE.py index 1f28f47..a63ef35 100644 --- a/bin/stSpatialDE.py +++ b/bin/stSpatialDE.py @@ -17,8 +17,8 @@ parser.add_argument('--fileName', metavar='file', type=str, default="st_adata_norm.h5ad", help='File name.') parser.add_argument('--saveFileName', metavar='file', type=str, default="stSpatialDE.csv", help='File name.') parser.add_argument('--savePlotName', metavar='plot', type=str, default="stSpatialDE.png", help='File name.') -parser.add_argument('--plotTopHVG', metavar='number', type=str, default=15, help='File name.') -parser.add_argument('--numberOfColumns', metavar='number', type=str, default=5, help='File name.') +parser.add_argument('--plotTopHVG', metavar='number', type=int, default=15, help='File name.') +parser.add_argument('--numberOfColumns', metavar='number', type=int, default=5, help='File name.') args = parser.parse_args() diff --git a/modules/local/tasks.nf b/modules/local/tasks.nf index ed76472..b5bd6e0 100644 --- a/modules/local/tasks.nf +++ b/modules/local/tasks.nf @@ -61,9 +61,9 @@ import groovy.json.JsonSlurper [ ! -d \${dname} ] && mkdir \${dname} - python $projectDir/bin/script_read_st_data.py --outsPath=${sample_info.st_data_dir} --saveFile=\${dname}/st_adata_raw.h5ad --countsFile=raw_feature_bc_matrix.h5 --npCountsOutputName=st_adata_counts_in_tissue.npz --minCounts=params.STload_minCounts --minCells=params.STload_minCells + python $projectDir/bin/script_read_st_data.py --outsPath=${sample_info.st_data_dir} --saveFile=\${dname}/st_adata_raw.h5ad --countsFile=raw_feature_bc_matrix.h5 --npCountsOutputName=st_adata_counts_in_tissue.npz --minCounts=$params.STload_minCounts --minCells=$params.STload_minCells - python $projectDir/bin/script_read_sc_data.py --outsPath=${sample_info.sc_data_dir} --saveFile=\${dname}/sc_adata_raw.h5ad --npCountsOutputName=sc_adata_counts.npz --minCounts=params.SCload_minCounts --minCells=params.SCload_minCells --minGenes=params.SCload_minGenes + python $projectDir/bin/script_read_sc_data.py --outsPath=${sample_info.sc_data_dir} --saveFile=\${dname}/sc_adata_raw.h5ad --npCountsOutputName=sc_adata_counts.npz --minCounts=$params.SCload_minCounts --minCells=$params.SCload_minCells --minGenes=$params.SCload_minGenes if [[ -s \${dname}/st_adata_raw.h5ad ]] && \ [[ -s \${dname}/sc_adata_raw.h5ad ]] && \ @@ -142,7 +142,7 @@ import groovy.json.JsonSlurper mitoFile=${outdir}/${sample_info.species}.MitoCarta2.0.txt - python $projectDir/bin/stPreprocess.py --filePath=\${dname}/ --npFactorsOutputName=st_adata_counts_in_tissue_factors.npz --rawAdata=st_adata_raw.h5ad --mitoFile=\$mitoFile --pltFigSize=params.STpreprocess_pltFigSize --minCounts=params.STpreprocess_minCounts --minGenes=params.STpreprocess_minGenes --minCells=params.STpreprocess_minCells --histplotQCmaxTotalCounts=params.STpreprocess_histplotQCmaxTotalCounts --histplotQCminGeneCounts=params.STpreprocess_histplotQCminGeneCounts --histplotQCbins=params.STpreprocess_histplotQCbins + python $projectDir/bin/stPreprocess.py --filePath=\${dname}/ --npFactorsOutputName=st_adata_counts_in_tissue_factors.npz --rawAdata=st_adata_raw.h5ad --mitoFile=\$mitoFile --pltFigSize=$params.STpreprocess_pltFigSize --minCounts=$params.STpreprocess_minCounts --minGenes=$params.STpreprocess_minGenes --minCells=$params.STpreprocess_minCells --histplotQCmaxTotalCounts=$params.STpreprocess_histplotQCmaxTotalCounts --histplotQCminGeneCounts=$params.STpreprocess_histplotQCminGeneCounts --histplotQCbins=$params.STpreprocess_histplotQCbins if [[ -s \${dname}/st_adata_norm.h5ad ]] && \ [[ -s \${dname}/st_adata_X.npz ]] && \ @@ -185,7 +185,7 @@ import groovy.json.JsonSlurper mitoFile=${outdir}/${sample_info.species}.MitoCarta2.0.txt - python $projectDir/bin/scPreprocess.py --filePath=\${dname}/ --npFactorsOutputName=sc_adata_counts_factors.npz --rawAdata=sc_adata_raw.h5ad --mitoFile=\$mitoFile --pltFigSize=params.SCpreprocess_pltFigSize --minCounts=params.SCpreprocess_minCounts --minGenes=params.SCpreprocess_minGenes --minCells=params.SCpreprocess_minCells --histplotQCmaxTotalCounts=params.SCpreprocess_histplotQCmaxTotalCounts --histplotQCminGeneCounts=params.SCpreprocess_histplotQCminGeneCounts --histplotQCbins=params.SCpreprocess_histplotQCbins + python $projectDir/bin/scPreprocess.py --filePath=\${dname}/ --npFactorsOutputName=sc_adata_counts_factors.npz --rawAdata=sc_adata_raw.h5ad --mitoFile=\$mitoFile --pltFigSize=$params.SCpreprocess_pltFigSize --minCounts=$params.SCpreprocess_minCounts --minGenes=$params.SCpreprocess_minGenes --minCells=$params.SCpreprocess_minCells --histplotQCmaxTotalCounts=$params.SCpreprocess_histplotQCmaxTotalCounts --histplotQCminGeneCounts=$params.SCpreprocess_histplotQCminGeneCounts --histplotQCbins=$params.SCpreprocess_histplotQCbins if [[ -s \${dname}/sc_adata_norm.h5ad ]] && \ [[ -s \${dname}/sc_adata_X.npz ]] && \ @@ -237,7 +237,7 @@ import groovy.json.JsonSlurper dname=${outdir}/\${sample_id} - Rscript $projectDir/bin/characterization_STdeconvolve.R --filePath=\${dname}/ --outsPath=${sample_info.st_data_dir} --mtxGeneColumn=params.STdeconvolve_mtxGeneColumn --countsFactor=params.STdeconvolve_countsFactor --corpusRemoveAbove=params.STdeconvolve_corpusRemoveAbove --corpusRemoveBelow=params.STdeconvolve_corpusRemoveBelow --LDAminTopics=params.STdeconvolve_LDAminTopics --LDAmaxTopics=params.STdeconvolve_LDAmaxTopics --STdeconvolveScatterpiesSize=params.STdeconvolve_ScatterpiesSize --STdeconvolveFeaturesSizeFactor=params.STdeconvolve_FeaturesSizeFactor + Rscript $projectDir/bin/characterization_STdeconvolve.R --filePath=\${dname}/ --outsPath=${sample_info.st_data_dir} --mtxGeneColumn=$params.STdeconvolve_mtxGeneColumn --countsFactor=$params.STdeconvolve_countsFactor --corpusRemoveAbove=$params.STdeconvolve_corpusRemoveAbove --corpusRemoveBelow=$params.STdeconvolve_corpusRemoveBelow --LDAminTopics=$params.STdeconvolve_LDAminTopics --LDAmaxTopics=$params.STdeconvolve_LDAmaxTopics --STdeconvolveScatterpiesSize=$params.STdeconvolve_ScatterpiesSize --STdeconvolveFeaturesSizeFactor=$params.STdeconvolve_FeaturesSizeFactor if [[ -s \${dname}/STdeconvolve_prop_norm.csv ]] && \ [[ -s \${dname}/STdeconvolve_beta_norm.csv ]] && \ @@ -281,7 +281,7 @@ import groovy.json.JsonSlurper dname=${outdir}/\${sample_id} - Rscript $projectDir/bin/characterization_SPOTlight.R --filePath=\${dname}/ --outsPath=${sample_info.st_data_dir} --mtxGeneColumn=params.SPOTlight_mtxGeneColumn --countsFactor=params.SPOTlight_countsFactor --clusterResolution=params.SPOTlight_clusterResolution --numberHVG=params.SPOTlight_numberHVG --numberCellsPerCelltype=params.SPOTlight_numberCellsPerCelltype --SPOTlightScatterpiesSize=params.SPOTlight_ScatterpiesSize + Rscript $projectDir/bin/characterization_SPOTlight.R --filePath=\${dname}/ --outsPath=${sample_info.st_data_dir} --mtxGeneColumn=$params.SPOTlight_mtxGeneColumn --countsFactor=$params.SPOTlight_countsFactor --clusterResolution=$params.SPOTlight_clusterResolution --numberHVG=$params.SPOTlight_numberHVG --numberCellsPerCelltype=$params.SPOTlight_numberCellsPerCelltype --SPOTlightScatterpiesSize=$params.SPOTlight_ScatterpiesSize if [[ -s \${dname}/SPOTlight_prop_norm.csv ]] && \ [[ -s \${dname}/SPOTlight_beta_norm.csv ]] && \ @@ -325,7 +325,7 @@ import groovy.json.JsonSlurper dname=${outdir}/\${sample_id} - Rscript $projectDir/bin/characterization_BayesSpace.R --filePath=\${dname}/ --numberHVG=params.BayesSpace_numberHVG --numberPCs=params.BayesSpace_numberPCs --minClusters=params.BayesSpace_minClusters --maxClusters=params.BayesSpace_maxClusters --optimalQ=params.BayesSpace_optimalQ --STplatform=params.BayesSpace_STplatform + Rscript $projectDir/bin/characterization_BayesSpace.R --filePath=\${dname}/ --numberHVG=$params.BayesSpace_numberHVG --numberPCs=$params.BayesSpace_numberPCs --minClusters=$params.BayesSpace_minClusters --maxClusters=$params.BayesSpace_maxClusters --optimalQ=$params.BayesSpace_optimalQ --STplatform=$params.BayesSpace_STplatform if [[ -s \${dname}/bayes_spot_cluster.csv ]] && \ [[ -s \${dname}/bayes_subspot_cluster_and_coord.csv ]] && \ @@ -366,7 +366,7 @@ import groovy.json.JsonSlurper dname=${outdir}/\${sample_id} - python $projectDir/bin/stSpatialDE.py --filePath=\${dname}/ --numberOfColumns=params.SpatialDE_numberOfColumns + python $projectDir/bin/stSpatialDE.py --filePath=\${dname}/ --numberOfColumns=$params.SpatialDE_numberOfColumns if [[ -s \${dname}/stSpatialDE.csv ]] then @@ -419,7 +419,7 @@ import groovy.json.JsonSlurper dname=${outdir}/\${sample_id} - python $projectDir/bin/stClusteringWorkflow.py --filePath=\${dname}/ --resolution=params.Clustering_resolution + python $projectDir/bin/stClusteringWorkflow.py --filePath=\${dname}/ --resolution=$params.Clustering_resolution if [[ -s \${dname}/st_adata_processed.h5ad ]] && \ [[ -s \${dname}/sc_adata_processed.h5ad ]]