Skip to content

Commit 36399b8

Browse files
committed
BugFix in createFinalHaplotypTable
1 parent adb8dc6 commit 36399b8

File tree

1 file changed

+50
-34
lines changed

1 file changed

+50
-34
lines changed

R/callHaplotype.R

+50-34
Original file line numberDiff line numberDiff line change
@@ -114,19 +114,19 @@ callHaplotype <- function(x, detectability=1/100, minHaplotypCoverage=3, minRepl
114114
minReplicate <- min(dim(x)[2], minReplicate)
115115

116116
# remove haplotypes without reads
117-
x <- x[rowSums(x>0) > 0,,drop=F]
117+
x <- x[rowSums(x>0, na.rm=T) > 0,,drop=F]
118118

119119
# selected and remove filtered haplotype
120120
idx <- rownames(x) %in% defineBackground
121121
background <- x[idx,, drop=F]
122122
x <- x[!idx,, drop=F]
123-
123+
124124
# check for noise haplotype
125-
minCov <- colSums(x)*detectability
125+
minCov <- colSums(x, na.rm=T)*detectability
126126
minCov[minCov<minHaplotypCoverage] <- minHaplotypCoverage
127127
if(dim(x)[1]>0){
128-
noiseIdx <- rowSums(t(t(x)/minCov) >= 1) < minReplicate # only haplotypes present in minimum replicates
129-
lowCnt <- colSums(x[noiseIdx,,drop=F])
128+
noiseIdx <- rowSums(t(t(x)/minCov) >= 1, na.rm=T) < minReplicate # only haplotypes present in minimum replicates
129+
lowCnt <- colSums(x[noiseIdx,,drop=F], na.rm=T)
130130
x <- x[!noiseIdx,,drop=F]
131131
}else{
132132
lowCnt <- 0
@@ -160,7 +160,8 @@ createFinalHaplotypTable <- function(outputDir=outputDir, sampleTable=procReads,
160160
tab <- createContingencyTable(as.character(samTab$ReadFile), dereplicated=F, inputFormat="fastq", outputDir=outFreqFiles,
161161
sampleNames=as.character(samTab$SampleID), replicatNames=NULL,
162162
haplotypeUIDFile=NULL)
163-
#write.table(tab, file=file.path(outputDir, sprintf("contingencyTable_%s%s.txt", marker, postfix)), sep="\t")
163+
if( getOption("HaplotypR.devel"))
164+
write.table(tab, file=file.path(outputDir, sprintf("contingencyTable_%s%s.txt", marker, postfix)), sep="\t")
164165
fnAllSeq <- file.path(outFreqFiles, sprintf("allSequences_%s%s.fasta", marker, postfix))
165166
file.rename(file.path(outFreqFiles, "allHaplotypes.fa"), fnAllSeq)
166167
frqfile <- file.path(outFreqFiles, paste(prefix, "_hapFreq.fa", sep=""))
@@ -194,7 +195,8 @@ createFinalHaplotypTable <- function(outputDir=outputDir, sampleTable=procReads,
194195
levels(overviewHap$FinalHaplotype) <- c(levels(overviewHap$FinalHaplotype), names(snps))
195196
overviewHap$FinalHaplotype[idx] <- names(snps)[match(overviewHap$snps[idx], snps)]
196197
overviewHap <- as.data.frame(overviewHap)
197-
#write.table(overviewHap, file=file.path(outputDir, sprintf("HaplotypeOverviewTable_%s%s.txt", marker, postfix)), sep="\t")
198+
if( getOption("HaplotypR.devel"))
199+
write.table(overviewHap, file=file.path(outputDir, sprintf("HaplotypeOverviewTable_%s%s.txt", marker, postfix)), sep="\t")
198200

199201
# runCallHaplotype <- function(input, output, session, volumes){
200202

@@ -221,55 +223,69 @@ createFinalHaplotypTable <- function(outputDir=outputDir, sampleTable=procReads,
221223

222224
# set final haplotype names
223225
rownames(haplotypesSample) <- overviewHap[rownames(haplotypesSample), "FinalHaplotype"]
224-
#write.table(cbind(HaplotypNames=rownames(haplotypesSample),haplotypesSample),
225-
# file=file.path(outputDir, sprintf("rawHaplotypeTable_%s%s.txt", marker, postfix)), sep="\t", row.names=F, col.names=T)
226+
if( getOption("HaplotypR.devel"))
227+
write.table(cbind(HaplotypNames=rownames(haplotypesSample),haplotypesSample),
228+
file=file.path(outputDir, sprintf("rawHaplotypeTable_%s%s.txt", marker, postfix)), sep="\t", row.names=F, col.names=T)
229+
226230
haplotypesSample <- reclusterHaplotypesTable(haplotypesSample)
227-
#write.table(cbind(HaplotypNames=rownames(haplotypesSample),haplotypesSample),
228-
# file=file.path(outputDir, sprintf("reclusteredHaplotypeTable_%s%s.txt", marker, postfix)), sep="\t", row.names=F, col.names=T)
231+
if( getOption("HaplotypR.devel"))
232+
write.table(cbind(HaplotypNames=rownames(haplotypesSample),haplotypesSample),
233+
file=file.path(outputDir, sprintf("reclusteredHaplotypeTable_%s%s.txt", marker, postfix)), sep="\t", row.names=F, col.names=T)
229234

230235

231236
# Apply cut-off haplotype only in 1 sample
232-
haplotypesSample <- callHaplotype(haplotypesSample, minHaplotypCoverage=minCov, minReplicate=minOccHap,
237+
haplotypesSample <- callHaplotype(haplotypesSample, minHaplotypCoverage=minCov, minReplicate=minReplicate,
233238
detectability=detectionLimit, minSampleCoverage=1)
234239

235-
# write.table(cbind(HaplotypNames=rownames(haplotypesSample), haplotypesSample),
236-
# file=file.path(outputDir, sprintf("finalHaplotypeTable_Hcov%.0f_Scov%.0f_occ%i_sens%.4f_%s%s.txt",
237-
# minCov, minCovSample, minOccHap, detectionLimit, marker, postfix)),
238-
# sep="\t", row.names=F, col.names=T)
240+
if( getOption("HaplotypR.devel"))
241+
write.table(cbind(HaplotypNames=rownames(haplotypesSample), haplotypesSample),
242+
file=file.path(outputDir, sprintf("finalHaplotypeTable_Hcov%.0f_Scov%.0f_occ%i_sens%.4f_%s%s.txt",
243+
minCov, minCovSample, minReplicate, detectionLimit, marker, postfix)),
244+
sep="\t", row.names=F, col.names=T)
245+
239246

240247
# check replicates
241248
idx <- split(1:dim(haplotypesSample)[2], samTab$SampleName)
242249
lst <- lapply(idx, function(i){
243250
tab <- callHaplotype(haplotypesSample[,i, drop=F], minHaplotypCoverage=minCov,
244-
minReplicate=length(i), detectability=detectionLimit, minSampleCoverage=minCovSample,
251+
minReplicate=minReplicate, detectability=detectionLimit, minSampleCoverage=minCovSample,
245252
reportBackground=T)
246253
tab <- cbind(samTab[rep(i,each=dim(tab)[1]), c("SampleID","SampleName","MarkerID")],
247254
Haplotype=rownames(tab), Reads=as.integer(tab))
248255
colnames(tab) <- c("SampleID","SampleName","MarkerID","Haplotype","Reads")
249256
rownames(tab) <- NULL
250257
#check individual samples for chimera
251-
if(length(i)){
252-
tmpTab <- as.data.frame(tapply(tab$Reads, tab$Haplotype, sum))
253-
tmpTab$Haplotype <- rownames(tmpTab)
254-
colnames(tmpTab) <- c("Reads","Haplotype")
255-
hIdx <- grep(marker, tmpTab$Haplotype)
256-
}else{
257-
hIdx <- grep(marker, tab$Haplotype)
258-
}
259-
chim <- NULL
260-
if(length(hIdx)>2){
261-
chim <- flagChimera(tmpTab[hIdx,], overviewHap)
262-
}
263-
rm(tmpTab)
264-
tab$FlagChimera <- tab$Haplotype %in% chim
265-
return(tab)
258+
do.call(rbind, lapply(split(tab, tab$SampleID), function(tt){
259+
chim <- NULL
260+
hIdx <- grep(marker, tt$Haplotype)
261+
if(length(hIdx)>2){
262+
chim <- flagChimera(tt[hIdx,], overviewHap)
263+
}
264+
tt$FlagChimera <- tt$Haplotype %in% chim
265+
return(tt)
266+
}))
267+
# if(length(i)>1){
268+
# tmpTab <- as.data.frame(tapply(tab$Reads, tab$Haplotype, sum))
269+
# tmpTab$Haplotype <- rownames(tmpTab)
270+
# colnames(tmpTab) <- c("Reads","Haplotype")
271+
# hIdx <- grep(marker, tmpTab$Haplotype)
272+
# }else{
273+
# hIdx <- grep(marker, tab$Haplotype)
274+
# }
275+
# chim <- NULL
276+
# if(length(hIdx)>2){
277+
# browser()
278+
# chim <- flagChimera(tmpTab[hIdx,], overviewHap)
279+
# }
280+
# rm(tmpTab)
281+
# tab$FlagChimera <- tab$Haplotype %in% chim
282+
# return(tab)
266283
})
267284
lst <- do.call(rbind, lst)
268285

269-
270286
write.table(lst,
271287
file=file.path(outputDir, sprintf("finalHaplotypeList_Hcov%.0f_Scov%.0f_occ%i_sens%.4f_%s%s.txt",
272-
minCov, minCovSample, minOccHap, detectionLimit, marker, postfix)),
288+
minCov, minCovSample, minReplicate, detectionLimit, marker, postfix)),
273289
sep="\t", row.names=F, col.names=T)
274290
rownames(lst) <- NULL
275291
return(lst)

0 commit comments

Comments
 (0)