From 1b35122c95606b595172fd477e1bbca5d5390ef5 Mon Sep 17 00:00:00 2001 From: jfsanchez Date: Thu, 28 Mar 2019 12:23:47 +0100 Subject: [PATCH] Raw/clean length or coverage in one file --- scripts/R/plot-covg-hist_raw-clean.R | 83 ++++++++++++++++++++++++++ scripts/R/plot-length-hist_raw-clean.R | 40 +++++++++++++ 2 files changed, 123 insertions(+) create mode 100755 scripts/R/plot-covg-hist_raw-clean.R create mode 100755 scripts/R/plot-length-hist_raw-clean.R diff --git a/scripts/R/plot-covg-hist_raw-clean.R b/scripts/R/plot-covg-hist_raw-clean.R new file mode 100755 index 00000000..5fed97c3 --- /dev/null +++ b/scripts/R/plot-covg-hist_raw-clean.R @@ -0,0 +1,83 @@ +#!/usr/bin/env Rscript --vanilla +## modified by: JFSanchezHerrero: https://github.com/JFsanchezherrero +## 28 March, 2019 + +# Plot coverage histograms generated by e.g. 'mccortex31 clean --covg-before out.csv ...' +# input csv should have the columns: 'Covg' and 'NumUnitigs' +# +args <- commandArgs(trailingOnly=TRUE) +if(length(args) < 2 || length(args) > 5) { + stop(paste("Usage: Rscript --vanilla plot-covg-hist.R [ []]", + " raw_covg.csv - raw coverage CSV file with 'Covg', 'NumUnitigs' headings", + " clean_covg.csv - clean coverage CSV file with 'Covg', 'NumUnitigs' headings", + " out.pdf - output pdf file", + " cutoff - cleaning cutoff used [optional]", + " kcov - kmer coverage of sample [optional]", + "", + sep="\n")) +} +cutoff <- 0 +kcov <- 0 + +input_raw_csv <- args[1] +input_clean_csv <- args[2] +output_pdf <- args[3] +if(length(args) >= 4) { cutoff <- as.numeric(args[4]) } +if(length(args) >= 5) { kcov <- as.numeric(args[5]) } + +library('ggplot2') +library('gridExtra') + +read_file_and_plot = function(file2read, colourGiven, name) { + d <- read.csv(file=file2read,sep=',',as.is=T) + ymax <- max(d[,'NumKmers'],d[,'NumUnitigs']) + cutline <- data.frame(x=as.numeric(c(cutoff,cutoff)), y=as.numeric(c(0,ymax))) + kcovline <- data.frame(x=as.numeric(c(kcov,kcov)), y=as.numeric(c(0,ymax))) + + p <- ggplot() + + geom_bar(data=d, stat="identity", aes(x=Covg,y=NumKmers,fill=FALSE), colour=colourGiven) + + guides(fill=FALSE) + + xlab("Coverage (X)") + + ylab("Number of kmers") + + ggtitle(name) + + xlim(0,500) + + if(cutoff > 0) { + cutline[,'y'] = c(0, max(d[,'NumKmers'])) + p <- p + geom_line(data=cutline, aes(x=x,y=y), colour="black") + } + + if(kcov > 0) { + kcovline[,'y'] = c(0, max(d[,'NumKmers'])) + p <- p + geom_line(data=kcovline, aes(x=x,y=y), colour="black", linetype="dashed") + } + + q <- ggplot() + + geom_bar(data=d, stat="identity", aes(x=Covg,y=NumUnitigs,fill=FALSE), colour=colourGiven) + + guides(fill=FALSE) + + xlab("Coverage (X)") + + ylab("Number of unitigs") + + ggtitle(name) + + xlim(0,500) + + if(cutoff > 0) { + cutline[,'y'] = c(0, max(d[,'NumUnitigs'])) + q <- q + geom_line(data=cutline, aes(x=x,y=y), colour="black") + } + + if(kcov > 0) { + kcovline[,'y'] = c(0, max(d[,'NumUnitigs'])) + q <- q + geom_line(data=kcovline, aes(x=x,y=y), colour="black", linetype="dashed") + } + + list2return <- list("NumKmers" = p, "NumUnitigs" = q) + return(list2return) +} + +## plot raw vs clean +raw <- read_file_and_plot(file2read = input_raw_csv, colourGiven = "firebrick", name="Raw") +clean <- read_file_and_plot(file2read = input_clean_csv, colourGiven = "blue", name = "Clean") + +pdf(output_pdf,width=12,height=6) +grid.arrange(raw$NumKmers, raw$NumUnitigs, clean$NumKmers, clean$NumUnitigs, ncol=2, left="Kmer coverage", right="Unitig Median Coverage") +dev.off() diff --git a/scripts/R/plot-length-hist_raw-clean.R b/scripts/R/plot-length-hist_raw-clean.R new file mode 100755 index 00000000..bda1aee0 --- /dev/null +++ b/scripts/R/plot-length-hist_raw-clean.R @@ -0,0 +1,40 @@ +#!/usr/bin/env Rscript --vanilla +## modified by: JFSanchezHerrero: https://github.com/JFsanchezherrero +## 28 March, 2019 + +# Plot coverage histograms generated by e.g. 'mccortex31 clean --length-before out.csv ...' +# input csv should have the columns: 'bp' and 'Count' +# +args <- commandArgs(trailingOnly=TRUE) +if(length(args) != 3) { + stop("Usage: Rscript --vanilla plot-hist-hist.R \n") +} + +raw_input_csv = args[1] +clean_input_csv = args[2] +output_pdf = args[3] + +library('ggplot2') +library('gridExtra') + +plot_length <- function(fileGiven, colourGiven, titleGiven) { + + d=read.csv(file=fileGiven, sep=',',as.is=T) + + p <- ggplot(data=d, aes(bp, Count)) + + geom_bar(stat="identity", color=colourGiven, fill=colourGiven) + + scale_y_log10() + + xlab("Untig length (bases)") + + ylab("Number of unitigs") + + ggtitle(titleGiven) + + return(list("plot" = p)) + +} + +raw <- plot_length(fileGiven = raw_input_csv, colourGiven = "seagreen", titleGiven = "Raw Unitig Length") +clean <- plot_length(fileGiven = clean_input_csv , colourGiven = "red", titleGiven = "Clean Unitig Length") + +pdf(output_pdf,width=12,height=6) +grid.arrange(raw$plot, clean$plot, ncol=2) +dev.off() \ No newline at end of file