@@ -196,7 +196,8 @@ bool JunctionsExtractor::junction_qc(Junction &j1) {
196
196
}
197
197
198
198
// Add a junction to the junctions map
199
- // The read_count field is the number of reads supporting the junction.
199
+ // The read_count field is the number of reads supporting the junction,
200
+ // and reads is a set of the read names supporting the junction
200
201
int JunctionsExtractor::add_junction (Junction j1) {
201
202
// Check junction_qc
202
203
if (!junction_qc (j1 )) {
@@ -219,44 +220,46 @@ int JunctionsExtractor::add_junction(Junction j1) {
219
220
}
220
221
string key = j1 .chrom + string (" :" ) + start + " -" + end + " :" + strand_proxy;
221
222
222
- // Check if new junction
223
+ // add new junction
223
224
if (!junctions_.count (key)) {
224
225
j1 .name = get_new_junction_name ();
225
226
j1 .read_count = 1 ;
226
227
j1 .score = common::num_to_str (j1 .read_count );
227
- } else { // existing junction
228
- Junction j0 = junctions_[key];
229
-
228
+ junctions_[key] = j1 ;
229
+
230
+ } else { // update existing junction
231
+
230
232
if (output_barcodes_file_ != " NA" ){
231
- unordered_map<string, int >::const_iterator it = j0 . barcodes . find ( j1 . barcodes . begin ()-> first );
232
-
233
- if ( it != j0 .barcodes .end ()) { // barcode exists already
234
- j1 .barcodes = j0 . barcodes ;
235
- j1 .barcodes [it->first ]++;
233
+ // NOTE: Junction j1 will always have exactly one barcode,
234
+ // that of the current alignment; i.e. j1.barcodes = {<barcode>: 1}
235
+ auto it = junctions_[key] .barcodes .find ( j1 . barcodes . begin ()-> first );
236
+ if (it != junctions_[key] .barcodes . end ()) { // barcode exists already
237
+ junctions_[key] .barcodes [it->first ]++;
236
238
} else {
237
- // this block is where the slowness happens - not sure if it's the instantiation or the insertion
238
- // well, tried to get around instantion by just inserting into j0 but that made it like another 2x slower so I don't think it's that
239
- pair<string, int > tmp_barcode = *j1 .barcodes .begin ();
240
- j1 .barcodes = j0 .barcodes ;
241
- j1 .barcodes .insert (tmp_barcode);
239
+ junctions_[key].barcodes [it->first ] = 1 ;
242
240
}
243
241
}
244
- // increment read count
245
- j1 .read_count = j0 .read_count + 1 ;
246
- j1 .score = common::num_to_str (j1 .read_count );
247
- // Keep the same name
248
- j1 .name = j0 .name ;
242
+ // following barcodes convention, Junction j1 has one read,
243
+ // that of the current alignment; i.e. j1.reads = {<read_name>}
244
+ string read_name = *j1 .reads .begin ();
245
+ // avoid counting the same paired-end read twice
246
+ // if both segments overlap junction
247
+ if (!junctions_[key].reads .count (read_name)) {
248
+ junctions_[key].reads .insert (read_name);
249
+ junctions_[key].read_count ++;
250
+ junctions_[key].score = common::num_to_str (junctions_[key].read_count );
251
+ }
249
252
// Check if thick starts are any better
250
- if (j0 .thick_start < j1 .thick_start )
251
- j1 .thick_start = j0 .thick_start ;
252
- if (j0 .thick_end > j1 .thick_end )
253
- j1 .thick_end = j0 .thick_end ;
254
- // preserve min anchor information
255
- j1 .has_left_min_anchor = j1 .has_left_min_anchor || j0 .has_left_min_anchor ;
256
- j1 .has_right_min_anchor = j1 .has_right_min_anchor || j0 .has_right_min_anchor ;
253
+ if (j1 .thick_start < junctions_[key].thick_start )
254
+ junctions_[key].thick_start = j1 .thick_start ;
255
+ if (j1 .thick_end > junctions_[key].thick_end )
256
+ junctions_[key].thick_end = j1 .thick_end ;
257
+ // update min anchor information
258
+ junctions_[key].has_left_min_anchor =
259
+ junctions_[key].has_left_min_anchor || j1 .has_left_min_anchor ;
260
+ junctions_[key].has_right_min_anchor =
261
+ junctions_[key].has_right_min_anchor || j1 .has_right_min_anchor ;
257
262
}
258
- // Add junction and check anchor while printing.
259
- junctions_[key] = j1 ;
260
263
return 0 ;
261
264
}
262
265
@@ -420,8 +423,9 @@ int JunctionsExtractor::parse_alignment_into_junctions(bam_hdr_t *header, bam1_t
420
423
421
424
Junction j1 ;
422
425
j1 .chrom = chr;
426
+ j1 .thick_start = read_pos; // start pos of read
423
427
j1 .start = read_pos; // maintain start pos of junction
424
- j1 .thick_start = read_pos ;
428
+ j1 .reads . insert ( bam_get_qname (aln)) ;
425
429
string intron_motif;
426
430
427
431
if (output_barcodes_file_ != " NA" ){
0 commit comments