Skip to content

Commit

Permalink
Add -minlength option to discard short overlaps.
Browse files Browse the repository at this point in the history
  • Loading branch information
brianwalenz committed Sep 18, 2020
1 parent f978ef6 commit d6b7a1f
Showing 1 changed file with 14 additions and 2 deletions.
16 changes: 14 additions & 2 deletions src/mhap/mhapConvert.C
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ int
main(int argc, char **argv) {
char *outName = NULL;
char *seqName = NULL;
int32 minLength = 0;

vector<char *> files;

Expand All @@ -41,6 +42,9 @@ main(int argc, char **argv) {
} else if (strcmp(argv[arg], "-S") == 0) {
seqName = argv[++arg];

} else if (strcmp(argv[arg], "-minlength") == 0) {
minLength = strtoint32(argv[++arg]);

} else if (fileExists(argv[arg])) {
files.push_back(argv[arg]);

Expand All @@ -55,6 +59,7 @@ main(int argc, char **argv) {
if ((err) || (seqName == NULL) || (outName == NULL) || (files.size() == 0)) {
fprintf(stderr, "usage: %s -S seqStore -o output.ovb input.mhap[.gz]\n", argv[0]);
fprintf(stderr, " Converts mhap native output to ovb\n");
fprintf(stderr, " -minlength X discards overlaps below X bp long.\n");

if (seqName == NULL)
fprintf(stderr, "ERROR: no seqStore (-S) supplied\n");
Expand Down Expand Up @@ -147,9 +152,16 @@ main(int argc, char **argv) {
ov.dat.ovl.bhg5, ov.dat.ovl.bhg3,
(ov.dat.ovl.flipped) ? " flipped" : ""), exit(1);

// Overlap looks good, write it!
// Overlap looks good, write it if its long enough. Bogart is
// computing overlap length as the max number of bases covered on
// either read.

int32 oalen = alen - ov.dat.ovl.ahg5 - ov.dat.ovl.ahg3;
int32 oblen = blen - ov.dat.ovl.bhg5 - ov.dat.ovl.bhg3;

of->writeOverlap(&ov);
if ((minLength <= oalen) ||
(minLength <= oblen))
of->writeOverlap(&ov);
}

delete in;
Expand Down

0 comments on commit d6b7a1f

Please sign in to comment.