@@ -198,7 +198,8 @@ bool JunctionsExtractor::junction_qc(Junction &j1) {
198
198
}
199
199
200
200
// Add a junction to the junctions map
201
- // The read_count field is the number of reads supporting the junction.
201
+ // The read_count field is the number of reads supporting the junction,
202
+ // and reads is a set of the read names supporting the junction
202
203
int JunctionsExtractor::add_junction (Junction j1) {
203
204
// Check junction_qc
204
205
if (!junction_qc (j1 )) {
@@ -221,44 +222,46 @@ int JunctionsExtractor::add_junction(Junction j1) {
221
222
}
222
223
string key = j1 .chrom + string (" :" ) + start + " -" + end + " :" + strand_proxy;
223
224
224
- // Check if new junction
225
+ // add new junction
225
226
if (!junctions_.count (key)) {
226
227
j1 .name = get_new_junction_name ();
227
228
j1 .read_count = 1 ;
228
229
j1 .score = common::num_to_str (j1 .read_count );
229
- } else { // existing junction
230
- Junction j0 = junctions_[key];
231
-
230
+ junctions_[key] = j1 ;
231
+
232
+ } else { // update existing junction
233
+
232
234
if (output_barcodes_file_ != " NA" ){
233
- unordered_map<string, int >::const_iterator it = j0 . barcodes . find ( j1 . barcodes . begin ()-> first );
234
-
235
- if ( it != j0 .barcodes .end ()) { // barcode exists already
236
- j1 .barcodes = j0 . barcodes ;
237
- j1 .barcodes [it->first ]++;
235
+ // NOTE: Junction j1 will always have exactly one barcode,
236
+ // that of the current alignment; i.e. j1.barcodes = {<barcode>: 1}
237
+ auto it = junctions_[key] .barcodes .find ( j1 . barcodes . begin ()-> first );
238
+ if (it != junctions_[key] .barcodes . end ()) { // barcode exists already
239
+ junctions_[key] .barcodes [it->first ]++;
238
240
} else {
239
- // this block is where the slowness happens - not sure if it's the instantiation or the insertion
240
- // well, tried to get around instantion by just inserting into j0 but that made it like another 2x slower so I don't think it's that
241
- pair<string, int > tmp_barcode = *j1 .barcodes .begin ();
242
- j1 .barcodes = j0 .barcodes ;
243
- j1 .barcodes .insert (tmp_barcode);
241
+ junctions_[key].barcodes [it->first ] = 1 ;
244
242
}
245
243
}
246
- // increment read count
247
- j1 .read_count = j0 .read_count + 1 ;
248
- j1 .score = common::num_to_str (j1 .read_count );
249
- // Keep the same name
250
- j1 .name = j0 .name ;
244
+ // following barcodes convention, Junction j1 has one read,
245
+ // that of the current alignment; i.e. j1.reads = {<read_name>}
246
+ string read_name = *j1 .reads .begin ();
247
+ // avoid counting the same paired-end read twice
248
+ // if both segments overlap junction
249
+ if (!junctions_[key].reads .count (read_name)) {
250
+ junctions_[key].reads .insert (read_name);
251
+ junctions_[key].read_count ++;
252
+ junctions_[key].score = common::num_to_str (junctions_[key].read_count );
253
+ }
251
254
// Check if thick starts are any better
252
- if (j0 .thick_start < j1 .thick_start )
253
- j1 .thick_start = j0 .thick_start ;
254
- if (j0 .thick_end > j1 .thick_end )
255
- j1 .thick_end = j0 .thick_end ;
256
- // preserve min anchor information
257
- j1 .has_left_min_anchor = j1 .has_left_min_anchor || j0 .has_left_min_anchor ;
258
- j1 .has_right_min_anchor = j1 .has_right_min_anchor || j0 .has_right_min_anchor ;
255
+ if (j1 .thick_start < junctions_[key].thick_start )
256
+ junctions_[key].thick_start = j1 .thick_start ;
257
+ if (j1 .thick_end > junctions_[key].thick_end )
258
+ junctions_[key].thick_end = j1 .thick_end ;
259
+ // update min anchor information
260
+ junctions_[key].has_left_min_anchor =
261
+ junctions_[key].has_left_min_anchor || j1 .has_left_min_anchor ;
262
+ junctions_[key].has_right_min_anchor =
263
+ junctions_[key].has_right_min_anchor || j1 .has_right_min_anchor ;
259
264
}
260
- // Add junction and check anchor while printing.
261
- junctions_[key] = j1 ;
262
265
return 0 ;
263
266
}
264
267
@@ -422,8 +425,9 @@ int JunctionsExtractor::parse_alignment_into_junctions(bam_hdr_t *header, bam1_t
422
425
423
426
Junction j1 ;
424
427
j1 .chrom = chr;
428
+ j1 .thick_start = read_pos; // start pos of read
425
429
j1 .start = read_pos; // maintain start pos of junction
426
- j1 .thick_start = read_pos ;
430
+ j1 .reads . insert ( bam_get_qname (aln)) ;
427
431
string intron_motif;
428
432
429
433
if (output_barcodes_file_ != " NA" ){
0 commit comments