Skip to content

Commit

Permalink
Revised the necessity to only remove duplicates with identical start
Browse files Browse the repository at this point in the history
coord.  It's not so vital with the revised formula, so within 10 works
good enough and prevents some more excessive depth raises.

Also (ifdef-ed out) implemented the "other" more complex method, for
comparison.  Cull whichever we wish before merging.
  • Loading branch information
jkbonfield committed Jun 16, 2020
1 parent 1881d9f commit dbbd34d
Showing 1 changed file with 37 additions and 7 deletions.
44 changes: 37 additions & 7 deletions sam.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ DEALINGS IN THE SOFTWARE. */
#include <assert.h>
#include <signal.h>
#include <inttypes.h>
#include <math.h>

// Suppress deprecation message for cigar_tab, which we initialise
#include "htslib/hts_defs.h"
Expand Down Expand Up @@ -4155,14 +4156,16 @@ struct __bam_plp_t {
void *data;
olap_hash_t *overlaps;

uint32_t depth[MAX_AHEAD];
uint64_t last_depth_pos;
double depth_pos_fract;

// For notification of creation and destruction events
// and associated client-owned pointer.
int (*plp_construct)(void *data, const bam1_t *b, bam_pileup_cd *cd);
int (*plp_destruct )(void *data, const bam1_t *b, bam_pileup_cd *cd);

// Field used for automatic depth reduction
uint32_t depth[MAX_AHEAD]; // circular depth buffer from b->core.pos on.
uint64_t last_depth_pos; // array filed out to this pos
uint64_t max_depth_pos; // >= max depth up to this pos
double depth_pos_fract; // (Simple method)
};

bam_plp_t bam_plp_init(bam_plp_auto_f func, void *data)
Expand Down Expand Up @@ -4519,10 +4522,12 @@ int bam_plp_push(bam_plp_t iter, const bam1_t *b)
return 0;
} else if (iter->depth_pos_fract > 0
&& iter->tid == b->core.tid
&& iter->pos == b->core.pos) {
&& b->core.pos - iter->pos < 10) {
// Removal from depth somewhere else along read
endpos = bam_endpos(b);
uint32_t len = (uint32_t)(endpos - b->core.pos - 1);

#if 1
len = len * iter->depth_pos_fract + 0.4999;
if (b->core.pos + len < iter->last_depth_pos
&& iter->depth[(b->core.pos+len)%MAX_AHEAD] > iter->maxcnt) {
Expand All @@ -4532,6 +4537,28 @@ int bam_plp_push(bam_plp_t iter, const bam1_t *b)
overlap_remove(iter, b);
return 0;
}
#else
len = len * iter->depth_pos_fract + 0.4999;
if (len >= MAX_AHEAD) len = MAX_AHEAD-1;
int64_t pos_delta = (b->core.pos + len) - iter->max_depth_pos -1;
int last_depth = endpos > iter->last_depth_pos
? 0
: iter->depth[(iter->last_depth_pos-1)%MAX_AHEAD];
int pos_depth = iter->depth[b->core.pos % MAX_AHEAD];

#ifndef MIN
#define MIN(a,b) ((a)<(b)?(a):(b))
#endif

if (drand48() >
//(double)h / 0xffffffff >
MIN(1.0, pow((double)pos_delta / (len+.01), 2))
* MIN(1.0, pow((1-(last_depth+.1)/iter->maxcnt), 2))
* MIN(1.0, pow((iter->maxcnt+.1)/pos_depth, 2)) *2) {
overlap_remove(iter, b);
return 0;
}
#endif

// Zero newly observed iter depth elemtns.
uint64_t end_clipped = endpos, p;
Expand All @@ -4547,8 +4574,11 @@ int bam_plp_push(bam_plp_t iter, const bam1_t *b)
}

// Increment depth
for (p = b->core.pos; p < end_clipped; p++)
iter->depth[p % MAX_AHEAD]++;
for (p = b->core.pos; p < end_clipped; p++) {
if (++iter->depth[p % MAX_AHEAD] >= iter->maxcnt)
if (iter->max_depth_pos < p)
iter->max_depth_pos = p;
}
}
if (bam_copy1(&iter->tail->b, b) == NULL)
return -1;
Expand Down

0 comments on commit dbbd34d

Please sign in to comment.