Skip to content

Commit bd7df5a

Browse files
committed
feat: added per-read anchor requirement to junction extract
- closes griffithlab#186, reads now only 'support' a junction if they have at least a given minimum anchor length, supplied with the '-A' flag
1 parent e82f1e4 commit bd7df5a

File tree

4 files changed

+31
-7
lines changed

4 files changed

+31
-7
lines changed

src/cis-splice-effects/cis_splice_effects_identifier.cc

+8-2
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ void CisSpliceEffectsIdentifier::usage(ostream& out) {
4444
out << "\t\t" << "-t STR\tTag used in bam to label strand. [XS]" << endl;
4545
out << "\t\t" << "-a INT\tMinimum anchor length. Junctions which satisfy a minimum \n"
4646
<< "\t\t\t " << "anchor length on both sides are reported. [8]" << endl;
47+
out << "\t\t" << "-A INT\tMinimum read anchor length. Reads which satisfy a minimum \n"
48+
<< "\t\t\t " << "anchor length on both sides 'support' a junction. [5]" << endl;
4749
out << "\t\t" << "-m INT\tMinimum intron length. [70]" << endl;
4850
out << "\t\t" << "-M INT\tMaximum intron length. [500000]" << endl;
4951
out << "\t\t" << "-f INT\tOnly use alignments where all flag bits set here are set. [0]" << endl;
@@ -116,7 +118,7 @@ void CisSpliceEffectsIdentifier::parse_options(int argc, char* argv[]) {
116118
optind = 1; //Reset before parsing again.
117119
stringstream help_ss;
118120
char c;
119-
while((c = getopt(argc, argv, "o:w:v:j:e:Ei:ISht:s:a:m:M:f:F:q:b:C")) != -1) {
121+
while((c = getopt(argc, argv, "o:w:v:j:e:Ei:ISht:s:a:A:m:M:f:F:q:b:C")) != -1) {
120122
switch(c) {
121123
case 'o':
122124
output_file_ = string(optarg);
@@ -167,6 +169,9 @@ void CisSpliceEffectsIdentifier::parse_options(int argc, char* argv[]) {
167169
case 'a':
168170
min_anchor_length_ = atoi(optarg);
169171
break;
172+
case 'A':
173+
min_read_anchor_length_ = atoi(optarg);
174+
break;
170175
case 'm':
171176
min_intron_length_ = atoi(optarg);
172177
break;
@@ -298,7 +303,8 @@ void CisSpliceEffectsIdentifier::identify() {
298303
ref_to_pass = "NA";
299304
}
300305
JunctionsExtractor je1(bam_, variant_region, strandness_,
301-
strand_tag_, min_anchor_length_, min_intron_length_, max_intron_length_,
306+
strand_tag_, min_anchor_length_, min_read_anchor_length_,
307+
min_intron_length_, max_intron_length_,
302308
filter_flags_, require_flags_, min_map_qual_, ref_to_pass);
303309
je1.identify_junctions_from_BAM();
304310
vector<Junction> junctions = je1.get_all_junctions();

src/cis-splice-effects/cis_splice_effects_identifier.h

+4-2
Original file line numberDiff line numberDiff line change
@@ -87,9 +87,10 @@ class CisSpliceEffectsIdentifier {
8787
//tag used in BAM to denote strand, default "XS"
8888
string strand_tag_;
8989
//Minimum anchor length for junctions
90-
//Junctions need at least this many bp overlap
91-
// on both ends.
90+
//Junctions need at least this many bp overlap on both ends.
9291
uint32_t min_anchor_length_;
92+
//Reads need at least this many bp overlap on both ends to support junction.
93+
uint32_t min_read_anchor_length_;
9394
//Minimum length of an intron, i.e min junction width
9495
uint32_t min_intron_length_;
9596
//Maximum length of an intron, i.e max junction width
@@ -118,6 +119,7 @@ class CisSpliceEffectsIdentifier {
118119
strandness_(-1),
119120
strand_tag_("XS"),
120121
min_anchor_length_(8),
122+
min_read_anchor_length_(5),
121123
min_intron_length_(70),
122124
max_intron_length_(500000),
123125
filter_flags_(0),

src/junctions/junctions_extractor.cc

+13-2
Original file line numberDiff line numberDiff line change
@@ -43,14 +43,17 @@ int JunctionsExtractor::parse_options(int argc, char *argv[]) {
4343
optind = 1; //Reset before parsing again.
4444
int c;
4545
stringstream help_ss;
46-
while((c = getopt(argc, argv, "ha:m:M:f:F:q:o:r:t:s:b:")) != -1) {
46+
while((c = getopt(argc, argv, "ha:A:m:M:f:F:q:o:r:t:s:b:")) != -1) {
4747
switch(c) {
4848
case 'h':
4949
usage(help_ss);
5050
throw common::cmdline_help_exception(help_ss.str());
5151
case 'a':
5252
min_anchor_length_ = atoi(optarg);
5353
break;
54+
case 'A':
55+
min_read_anchor_length_ = atoi(optarg);
56+
break;
5457
case 'm':
5558
min_intron_length_ = atoi(optarg);
5659
break;
@@ -119,6 +122,7 @@ int JunctionsExtractor::parse_options(int argc, char *argv[]) {
119122
}
120123

121124
cerr << "Minimum junction anchor length: " << min_anchor_length_ << endl;
125+
cerr << "Minimum read anchor length: " << min_read_anchor_length_ << endl;
122126
cerr << "Minimum intron length: " << min_intron_length_ << endl;
123127
cerr << "Maximum intron length: " << max_intron_length_ << endl;
124128
cerr << "Require alignment flags: " << require_flags_ << endl;
@@ -140,6 +144,8 @@ int JunctionsExtractor::usage(ostream& out) {
140144
out << "Options:" << endl;
141145
out << "\t\t" << "-a INT\tMinimum anchor length. Junctions which satisfy a minimum \n"
142146
<< "\t\t\t " << "anchor length on both sides are reported. [8]" << endl;
147+
out << "\t\t" << "-A INT\tMinimum read anchor length. Reads which satisfy a minimum \n"
148+
<< "\t\t\t " << "anchor length on both sides 'support' a junction. [5]" << endl;
143149
out << "\t\t" << "-m INT\tMinimum intron length. [70]" << endl;
144150
out << "\t\t" << "-M INT\tMaximum intron length. [500000]" << endl;
145151
out << "\t\t" << "-f INT\tOnly use alignments where all flag bits set here are set. [0]" << endl;
@@ -171,12 +177,17 @@ string JunctionsExtractor::get_new_junction_name() {
171177
return name_ss.str();
172178
}
173179

174-
//Do some basic qc on the junction
180+
//Do some basic qc on the junction (and the read alignment)
175181
bool JunctionsExtractor::junction_qc(Junction &j1) {
182+
// don't add support for junction if read alignment doesn't meet criteria
176183
if(j1.end - j1.start < min_intron_length_ ||
177184
j1.end - j1.start > max_intron_length_) {
178185
return false;
179186
}
187+
if(j1.start - j1.thick_start < min_read_anchor_length_) return false;
188+
if(j1.thick_end - j1.end < min_read_anchor_length_) return false;
189+
190+
// add support, update if this junction is sufficiently anchored
180191
if(j1.start - j1.thick_start >= min_anchor_length_)
181192
j1.has_left_min_anchor = true;
182193
if(j1.thick_end - j1.end >= min_anchor_length_)

src/junctions/junctions_extractor.h

+6-1
Original file line numberDiff line numberDiff line change
@@ -153,9 +153,11 @@ class JunctionsExtractor {
153153
//Reference FASTA file
154154
string ref_;
155155
//Minimum anchor length for junctions
156-
//Junctions need atleast this many bp overlap
156+
//Junctions need at least this many bp overlap
157157
// on both ends.
158158
uint32_t min_anchor_length_;
159+
//Reads need at least this many bp overlap to support a junction
160+
uint32_t min_read_anchor_length_;
159161
//Minimum length of an intron, i.e min junction width
160162
uint32_t min_intron_length_;
161163
//Maximum length of an intron, i.e max junction width
@@ -190,6 +192,7 @@ class JunctionsExtractor {
190192
//Default constructor
191193
JunctionsExtractor() {
192194
min_anchor_length_ = 8;
195+
min_read_anchor_length_ = 5;
193196
min_intron_length_ = 70;
194197
max_intron_length_ = 500000;
195198
filter_flags_ = 0;
@@ -211,6 +214,7 @@ class JunctionsExtractor {
211214
int strandness1,
212215
string strand_tag1,
213216
uint32_t min_anchor_length1,
217+
uint32_t min_read_anchor_length1,
214218
uint32_t min_intron_length1,
215219
uint32_t max_intron_length1,
216220
uint16_t filter_flags,
@@ -221,6 +225,7 @@ class JunctionsExtractor {
221225
strandness_(strandness1),
222226
strand_tag_(strand_tag1),
223227
min_anchor_length_(min_anchor_length1),
228+
min_read_anchor_length_(min_read_anchor_length1),
224229
min_intron_length_(min_intron_length1),
225230
max_intron_length_(max_intron_length1),
226231
filter_flags_(filter_flags),

0 commit comments

Comments
 (0)