@@ -202,7 +202,8 @@ bool JunctionsExtractor::junction_qc(Junction &j1) {
202
202
}
203
203
204
204
// Add a junction to the junctions map
205
- // The read_count field is the number of reads supporting the junction.
205
+ // The read_count field is the number of reads supporting the junction,
206
+ // and reads is a set of the read names supporting the junction
206
207
int JunctionsExtractor::add_junction (Junction j1) {
207
208
// Check junction_qc
208
209
if (!junction_qc (j1 )) {
@@ -225,44 +226,46 @@ int JunctionsExtractor::add_junction(Junction j1) {
225
226
}
226
227
string key = j1 .chrom + string (" :" ) + start + " -" + end + " :" + strand_proxy;
227
228
228
- // Check if new junction
229
+ // add new junction
229
230
if (!junctions_.count (key)) {
230
231
j1 .name = get_new_junction_name ();
231
232
j1 .read_count = 1 ;
232
233
j1 .score = common::num_to_str (j1 .read_count );
233
- } else { // existing junction
234
- Junction j0 = junctions_[key];
235
-
234
+ junctions_[key] = j1 ;
235
+
236
+ } else { // update existing junction
237
+
236
238
if (output_barcodes_file_ != " NA" ){
237
- unordered_map<string, int >::const_iterator it = j0 . barcodes . find ( j1 . barcodes . begin ()-> first );
238
-
239
- if ( it != j0 .barcodes .end ()) { // barcode exists already
240
- j1 .barcodes = j0 . barcodes ;
241
- j1 .barcodes [it->first ]++;
239
+ // NOTE: Junction j1 will always have exactly one barcode,
240
+ // that of the current alignment; i.e. j1.barcodes = {<barcode>: 1}
241
+ auto it = junctions_[key] .barcodes .find ( j1 . barcodes . begin ()-> first );
242
+ if (it != junctions_[key] .barcodes . end ()) { // barcode exists already
243
+ junctions_[key] .barcodes [it->first ]++;
242
244
} else {
243
- // this block is where the slowness happens - not sure if it's the instantiation or the insertion
244
- // well, tried to get around instantion by just inserting into j0 but that made it like another 2x slower so I don't think it's that
245
- pair<string, int > tmp_barcode = *j1 .barcodes .begin ();
246
- j1 .barcodes = j0 .barcodes ;
247
- j1 .barcodes .insert (tmp_barcode);
245
+ junctions_[key].barcodes [it->first ] = 1 ;
248
246
}
249
247
}
250
- // increment read count
251
- j1 .read_count = j0 .read_count + 1 ;
252
- j1 .score = common::num_to_str (j1 .read_count );
253
- // Keep the same name
254
- j1 .name = j0 .name ;
248
+ // following barcodes convention, Junction j1 has one read,
249
+ // that of the current alignment; i.e. j1.reads = {<read_name>}
250
+ string read_name = *j1 .reads .begin ();
251
+ // avoid counting the same paired-end read twice
252
+ // if both segments overlap junction
253
+ if (!junctions_[key].reads .count (read_name)) {
254
+ junctions_[key].reads .insert (read_name);
255
+ junctions_[key].read_count ++;
256
+ junctions_[key].score = common::num_to_str (junctions_[key].read_count );
257
+ }
255
258
// Check if thick starts are any better
256
- if (j0 .thick_start < j1 .thick_start )
257
- j1 .thick_start = j0 .thick_start ;
258
- if (j0 .thick_end > j1 .thick_end )
259
- j1 .thick_end = j0 .thick_end ;
260
- // preserve min anchor information
261
- j1 .has_left_min_anchor = j1 .has_left_min_anchor || j0 .has_left_min_anchor ;
262
- j1 .has_right_min_anchor = j1 .has_right_min_anchor || j0 .has_right_min_anchor ;
259
+ if (j1 .thick_start < junctions_[key].thick_start )
260
+ junctions_[key].thick_start = j1 .thick_start ;
261
+ if (j1 .thick_end > junctions_[key].thick_end )
262
+ junctions_[key].thick_end = j1 .thick_end ;
263
+ // update min anchor information
264
+ junctions_[key].has_left_min_anchor =
265
+ junctions_[key].has_left_min_anchor || j1 .has_left_min_anchor ;
266
+ junctions_[key].has_right_min_anchor =
267
+ junctions_[key].has_right_min_anchor || j1 .has_right_min_anchor ;
263
268
}
264
- // Add junction and check anchor while printing.
265
- junctions_[key] = j1 ;
266
269
return 0 ;
267
270
}
268
271
@@ -426,8 +429,9 @@ int JunctionsExtractor::parse_alignment_into_junctions(bam_hdr_t *header, bam1_t
426
429
427
430
Junction j1 ;
428
431
j1 .chrom = chr;
432
+ j1 .thick_start = read_pos; // start pos of read
429
433
j1 .start = read_pos; // maintain start pos of junction
430
- j1 .thick_start = read_pos ;
434
+ j1 .reads . insert ( bam_get_qname (aln)) ;
431
435
string intron_motif;
432
436
433
437
if (output_barcodes_file_ != " NA" ){
0 commit comments