@@ -114,19 +114,19 @@ callHaplotype <- function(x, detectability=1/100, minHaplotypCoverage=3, minRepl
114
114
minReplicate <- min(dim(x )[2 ], minReplicate )
115
115
116
116
# remove haplotypes without reads
117
- x <- x [rowSums(x > 0 ) > 0 ,,drop = F ]
117
+ x <- x [rowSums(x > 0 , na.rm = T ) > 0 ,,drop = F ]
118
118
119
119
# selected and remove filtered haplotype
120
120
idx <- rownames(x ) %in% defineBackground
121
121
background <- x [idx ,, drop = F ]
122
122
x <- x [! idx ,, drop = F ]
123
-
123
+
124
124
# check for noise haplotype
125
- minCov <- colSums(x )* detectability
125
+ minCov <- colSums(x , na.rm = T )* detectability
126
126
minCov [minCov < minHaplotypCoverage ] <- minHaplotypCoverage
127
127
if (dim(x )[1 ]> 0 ){
128
- noiseIdx <- rowSums(t(t(x )/ minCov ) > = 1 ) < minReplicate # only haplotypes present in minimum replicates
129
- lowCnt <- colSums(x [noiseIdx ,,drop = F ])
128
+ noiseIdx <- rowSums(t(t(x )/ minCov ) > = 1 , na.rm = T ) < minReplicate # only haplotypes present in minimum replicates
129
+ lowCnt <- colSums(x [noiseIdx ,,drop = F ], na.rm = T )
130
130
x <- x [! noiseIdx ,,drop = F ]
131
131
}else {
132
132
lowCnt <- 0
@@ -160,7 +160,8 @@ createFinalHaplotypTable <- function(outputDir=outputDir, sampleTable=procReads,
160
160
tab <- createContingencyTable(as.character(samTab $ ReadFile ), dereplicated = F , inputFormat = " fastq" , outputDir = outFreqFiles ,
161
161
sampleNames = as.character(samTab $ SampleID ), replicatNames = NULL ,
162
162
haplotypeUIDFile = NULL )
163
- # write.table(tab, file=file.path(outputDir, sprintf("contingencyTable_%s%s.txt", marker, postfix)), sep="\t")
163
+ if ( getOption(" HaplotypR.devel" ))
164
+ write.table(tab , file = file.path(outputDir , sprintf(" contingencyTable_%s%s.txt" , marker , postfix )), sep = " \t " )
164
165
fnAllSeq <- file.path(outFreqFiles , sprintf(" allSequences_%s%s.fasta" , marker , postfix ))
165
166
file.rename(file.path(outFreqFiles , " allHaplotypes.fa" ), fnAllSeq )
166
167
frqfile <- file.path(outFreqFiles , paste(prefix , " _hapFreq.fa" , sep = " " ))
@@ -194,7 +195,8 @@ createFinalHaplotypTable <- function(outputDir=outputDir, sampleTable=procReads,
194
195
levels(overviewHap $ FinalHaplotype ) <- c(levels(overviewHap $ FinalHaplotype ), names(snps ))
195
196
overviewHap $ FinalHaplotype [idx ] <- names(snps )[match(overviewHap $ snps [idx ], snps )]
196
197
overviewHap <- as.data.frame(overviewHap )
197
- # write.table(overviewHap, file=file.path(outputDir, sprintf("HaplotypeOverviewTable_%s%s.txt", marker, postfix)), sep="\t")
198
+ if ( getOption(" HaplotypR.devel" ))
199
+ write.table(overviewHap , file = file.path(outputDir , sprintf(" HaplotypeOverviewTable_%s%s.txt" , marker , postfix )), sep = " \t " )
198
200
199
201
# runCallHaplotype <- function(input, output, session, volumes){
200
202
@@ -221,55 +223,69 @@ createFinalHaplotypTable <- function(outputDir=outputDir, sampleTable=procReads,
221
223
222
224
# set final haplotype names
223
225
rownames(haplotypesSample ) <- overviewHap [rownames(haplotypesSample ), " FinalHaplotype" ]
224
- # write.table(cbind(HaplotypNames=rownames(haplotypesSample),haplotypesSample),
225
- # file=file.path(outputDir, sprintf("rawHaplotypeTable_%s%s.txt", marker, postfix)), sep="\t", row.names=F, col.names=T)
226
+ if ( getOption(" HaplotypR.devel" ))
227
+ write.table(cbind(HaplotypNames = rownames(haplotypesSample ),haplotypesSample ),
228
+ file = file.path(outputDir , sprintf(" rawHaplotypeTable_%s%s.txt" , marker , postfix )), sep = " \t " , row.names = F , col.names = T )
229
+
226
230
haplotypesSample <- reclusterHaplotypesTable(haplotypesSample )
227
- # write.table(cbind(HaplotypNames=rownames(haplotypesSample),haplotypesSample),
228
- # file=file.path(outputDir, sprintf("reclusteredHaplotypeTable_%s%s.txt", marker, postfix)), sep="\t", row.names=F, col.names=T)
231
+ if ( getOption(" HaplotypR.devel" ))
232
+ write.table(cbind(HaplotypNames = rownames(haplotypesSample ),haplotypesSample ),
233
+ file = file.path(outputDir , sprintf(" reclusteredHaplotypeTable_%s%s.txt" , marker , postfix )), sep = " \t " , row.names = F , col.names = T )
229
234
230
235
231
236
# Apply cut-off haplotype only in 1 sample
232
- haplotypesSample <- callHaplotype(haplotypesSample , minHaplotypCoverage = minCov , minReplicate = minOccHap ,
237
+ haplotypesSample <- callHaplotype(haplotypesSample , minHaplotypCoverage = minCov , minReplicate = minReplicate ,
233
238
detectability = detectionLimit , minSampleCoverage = 1 )
234
239
235
- # write.table(cbind(HaplotypNames=rownames(haplotypesSample), haplotypesSample),
236
- # file=file.path(outputDir, sprintf("finalHaplotypeTable_Hcov%.0f_Scov%.0f_occ%i_sens%.4f_%s%s.txt",
237
- # minCov, minCovSample, minOccHap, detectionLimit, marker, postfix)),
238
- # sep="\t", row.names=F, col.names=T)
240
+ if ( getOption(" HaplotypR.devel" ))
241
+ write.table(cbind(HaplotypNames = rownames(haplotypesSample ), haplotypesSample ),
242
+ file = file.path(outputDir , sprintf(" finalHaplotypeTable_Hcov%.0f_Scov%.0f_occ%i_sens%.4f_%s%s.txt" ,
243
+ minCov , minCovSample , minReplicate , detectionLimit , marker , postfix )),
244
+ sep = " \t " , row.names = F , col.names = T )
245
+
239
246
240
247
# check replicates
241
248
idx <- split(1 : dim(haplotypesSample )[2 ], samTab $ SampleName )
242
249
lst <- lapply(idx , function (i ){
243
250
tab <- callHaplotype(haplotypesSample [,i , drop = F ], minHaplotypCoverage = minCov ,
244
- minReplicate = length( i ) , detectability = detectionLimit , minSampleCoverage = minCovSample ,
251
+ minReplicate = minReplicate , detectability = detectionLimit , minSampleCoverage = minCovSample ,
245
252
reportBackground = T )
246
253
tab <- cbind(samTab [rep(i ,each = dim(tab )[1 ]), c(" SampleID" ," SampleName" ," MarkerID" )],
247
254
Haplotype = rownames(tab ), Reads = as.integer(tab ))
248
255
colnames(tab ) <- c(" SampleID" ," SampleName" ," MarkerID" ," Haplotype" ," Reads" )
249
256
rownames(tab ) <- NULL
250
257
# check individual samples for chimera
251
- if (length(i )){
252
- tmpTab <- as.data.frame(tapply(tab $ Reads , tab $ Haplotype , sum ))
253
- tmpTab $ Haplotype <- rownames(tmpTab )
254
- colnames(tmpTab ) <- c(" Reads" ," Haplotype" )
255
- hIdx <- grep(marker , tmpTab $ Haplotype )
256
- }else {
257
- hIdx <- grep(marker , tab $ Haplotype )
258
- }
259
- chim <- NULL
260
- if (length(hIdx )> 2 ){
261
- chim <- flagChimera(tmpTab [hIdx ,], overviewHap )
262
- }
263
- rm(tmpTab )
264
- tab $ FlagChimera <- tab $ Haplotype %in% chim
265
- return (tab )
258
+ do.call(rbind , lapply(split(tab , tab $ SampleID ), function (tt ){
259
+ chim <- NULL
260
+ hIdx <- grep(marker , tt $ Haplotype )
261
+ if (length(hIdx )> 2 ){
262
+ chim <- flagChimera(tt [hIdx ,], overviewHap )
263
+ }
264
+ tt $ FlagChimera <- tt $ Haplotype %in% chim
265
+ return (tt )
266
+ }))
267
+ # if(length(i)>1){
268
+ # tmpTab <- as.data.frame(tapply(tab$Reads, tab$Haplotype, sum))
269
+ # tmpTab$Haplotype <- rownames(tmpTab)
270
+ # colnames(tmpTab) <- c("Reads","Haplotype")
271
+ # hIdx <- grep(marker, tmpTab$Haplotype)
272
+ # }else{
273
+ # hIdx <- grep(marker, tab$Haplotype)
274
+ # }
275
+ # chim <- NULL
276
+ # if(length(hIdx)>2){
277
+ # browser()
278
+ # chim <- flagChimera(tmpTab[hIdx,], overviewHap)
279
+ # }
280
+ # rm(tmpTab)
281
+ # tab$FlagChimera <- tab$Haplotype %in% chim
282
+ # return(tab)
266
283
})
267
284
lst <- do.call(rbind , lst )
268
285
269
-
270
286
write.table(lst ,
271
287
file = file.path(outputDir , sprintf(" finalHaplotypeList_Hcov%.0f_Scov%.0f_occ%i_sens%.4f_%s%s.txt" ,
272
- minCov , minCovSample , minOccHap , detectionLimit , marker , postfix )),
288
+ minCov , minCovSample , minReplicate , detectionLimit , marker , postfix )),
273
289
sep = " \t " , row.names = F , col.names = T )
274
290
rownames(lst ) <- NULL
275
291
return (lst )
0 commit comments