Skip to content

Commit

Permalink
Torrent Suite Software 5.10.1 Release
Browse files Browse the repository at this point in the history
  • Loading branch information
ivan-pan-tfs committed Oct 19, 2018
1 parent 4187790 commit d4a04a9
Show file tree
Hide file tree
Showing 37 changed files with 302 additions and 92 deletions.
2 changes: 2 additions & 0 deletions Analysis/VariantCaller/BAMWalkerEngine.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ struct Alignment {
next = NULL;
read_number = 0;
original_position = 0;
original_end_position = 0;
processed = false;
processing_prev = NULL;
processing_next = NULL;
Expand Down Expand Up @@ -101,6 +102,7 @@ struct Alignment {
Alignment* next; //! Singly-linked list for durable alignments iterator
int read_number; //! Sequential number of this read
int original_position; //! Alignment position, before primer trimming
int original_end_position; //! Alignment end position, before primer trimming

// Processing state
bool processed; //! Is candidate generator's pre-processing finished?
Expand Down
35 changes: 25 additions & 10 deletions Analysis/VariantCaller/EnsembleEval/StackEngine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1921,7 +1921,8 @@ void EnsembleEval::LookAheadSlidingWindow(int current_candidate_gen_window_end_0
vector<int>& alleles_on_hold,
int& sliding_window_start_0,
int& sliding_window_end_0,
int max_group_size_allowed){
int max_group_size_allowed,
const TargetsManager * const targets_manager){
const int num_alt_alleles = (int) allele_identity_vector.size();
int splicing_lower_bound_at_current_candidate_gen_window_end_0 = -1;
LocalReferenceContext current_candidate_gen_window_context;
Expand All @@ -1936,8 +1937,12 @@ void EnsembleEval::LookAheadSlidingWindow(int current_candidate_gen_window_end_0
// I look ahead just 1bp every time.
sliding_window_end_0 = min(current_candidate_gen_window_end_0 + 1, (int) ref_reader.chr_size(seq_context.chr_idx));

// (Step 1.b): The trivial (and perhaps the most common) case:
// If every allele and its end of the variant window hits the lookahead window end, then every allele is on hold.
// (Step 1.b): The trivial (and perhaps the most common) cases:
// (1.b.1) Every allele is ready to go if the look ahead window can't be fully covered by any unmerged region.
int current_merged_target_idx = targets_manager->FindMergedTargetIndex(seq_context.chr_idx, seq_context.position0);
bool is_breaking_point = (current_merged_target_idx >= 0)? targets_manager->IsBreakingIntervalInMerged(current_merged_target_idx, seq_context.chr_idx, seq_context.position0, max(seq_context.position0, (long) sliding_window_end_0)) : false;

// (1.b.2) If every allele and its end of the variant window hits the lookahead window end, then every allele is on hold.
bool is_trivial_all_on_hold = true;
for (int i_alt = 0; i_alt < num_alt_alleles; ++i_alt){
if (allele_identity_vector[i_alt].end_variant_window < current_candidate_gen_window_end_0){
Expand All @@ -1946,7 +1951,7 @@ void EnsembleEval::LookAheadSlidingWindow(int current_candidate_gen_window_end_0
}
}

if (is_trivial_all_on_hold){
if (is_trivial_all_on_hold and (not is_breaking_point)){
for (int i_alt = 0; i_alt < num_alt_alleles; ++i_alt){
alleles_on_hold.push_back(i_alt);
// Sort alleles_on_hold
Expand All @@ -1973,14 +1978,18 @@ void EnsembleEval::LookAheadSlidingWindow(int current_candidate_gen_window_end_0
// I don't do final splitting.
SplitAlleleIdentityVector(padding_removed_allele_identity_vector, allele_groups_ready_to_go, ref_reader, num_alt_alleles, true, 0);

// (Step 2.b): Trivial case: No need to look ahead => all alleles are ready to go.
if (current_candidate_gen_window_end_0 == sliding_window_end_0 or sliding_window_end_0 == (int) ref_reader.chr_size(seq_context.chr_idx)){
// (Step 2.b): Trivial cases: No need to look ahead => all alleles are ready to go.
// Case 1: hit the end of the chromosome
// Case 2: breaking point in the merged region
if (current_candidate_gen_window_end_0 == sliding_window_end_0
or sliding_window_end_0 == (int) ref_reader.chr_size(seq_context.chr_idx)
or is_breaking_point){
sliding_window_start_0 = sliding_window_end_0;
if (DEBUG){
cout << "+ Calculating the look ahead \"sliding\" window for (" << PrintVariant(*variant) <<"): "<< endl
<< " - Current candidate window end = " << current_candidate_gen_window_end_0 << endl
<< " - Current variant window = [" << seq_context.position0 << ", " << (int) seq_context.position0 + (int) seq_context.reference_allele.size() << ")" << endl
<< " - All alleles are ready to go and no need to look ahead. " << endl;
<< " - All alleles are ready to go and no need to look ahead " << (is_breaking_point? "because it hits a breaking point in the merged region)." : ".") << endl;
}
FinalSplitReadyToGoAlleles(allele_groups_ready_to_go, ref_reader, max_group_size_allowed);
return;
Expand Down Expand Up @@ -2269,14 +2278,16 @@ void EnsembleEval::StackUpOneVariant(const ExtendParameters &parameters, const P
if (rai->sample_index != sample_index)
continue;

if (rai->alignment.Position > multiallele_window_start)
// TS-17069: The primer-trimmed read must fully cover the variant
if (rai->alignment.Position > seq_context.position0 or rai->alignment.GetEndPosition() < (int) seq_context.position0 + (int) seq_context.reference_allele.size())
continue;

if (rai->filtered)
continue;

if (rai->alignment.GetEndPosition() < multiallele_window_end)
continue;
// TS-17069: The original read must fully cover the splicing window. (rai->original_positinon has been checked)
if (rai->original_end_position < multiallele_window_end)
continue;

// Reservoir Sampling
if (read_stack.size() < (unsigned int)parameters.my_controls.downSampleCoverage) {
Expand Down Expand Up @@ -2871,6 +2882,10 @@ void EnsembleEval::StackUpOneVariantMolTag(const ExtendParameters &parameters, v
continue;
}

// Notes for TS-17069:
// However, TS-17069 doesn't affect the UMT runs in 5.10 since trim-ampliseq-primers is turned off.
// So the logic for the fix of TS-17069 is not applied in TS 5.10.1 for UMT (though I should, and also handle the consensus alignment)

// Check global conditions to stop reading in more alignments
if ((*member_it)->original_position > multiallele_window_start
or (*member_it)->alignment.Position > multiallele_window_start
Expand Down
2 changes: 1 addition & 1 deletion Analysis/VariantCaller/EnsembleEval/StackEngine.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ class EnsembleEval {
//! @brief Split the current variant into as many callable smaller variants as possible (primarily for candidate generator))
void SplitMyAlleleIdentityVector(list<list<int> >& allele_group, const ReferenceReader &ref_reader, int max_group_size_allowed);
//! @brief The "one-window" approach
void LookAheadSlidingWindow(int current_candidate_gen_window_end_0, const ReferenceReader &ref_reader, list<list<int> >& allele_groups_ready_to_go, vector<int>& alleles_on_hold, int& sliding_window_start_0, int& sliding_window_end_0, int max_group_size_allowed);
void LookAheadSlidingWindow(int current_candidate_gen_window_end_0, const ReferenceReader &ref_reader, list<list<int> >& allele_groups_ready_to_go, vector<int>& alleles_on_hold, int& sliding_window_start_0, int& sliding_window_end_0, int max_group_size_allowed, const TargetsManager * const targets_manager);
void FinalSplitReadyToGoAlleles(list<list<int> >& allele_groups_ready_to_go, const ReferenceReader &ref_reader, int max_group_size_allowed);

//------------------------------------------------------------------
Expand Down
4 changes: 2 additions & 2 deletions Analysis/VariantCaller/HandleVariant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -405,12 +405,12 @@ void CandidateExaminer::SplitCandidateVariant(list<list<int> >& allele_groups){
// !!! Important !!! If sliding_window_start_0 == sliding_window_end_0 == current_look_ahead_window_end_0, then all alleles are ready to go. No need to look ahead.
// !!! Important !!! It shall guarantee both conditions as follows: a) All on-hold alleles will be generated in the new sliding window. b) No ready-to-go alleles will be generated in the new sliding window.
void CandidateExaminer::LookAheadSlidingWindow0(list<list<int> >& allele_groups_ready_to_go, vector<int>& alleles_on_hold, int& sliding_window_start_0, int& sliding_window_end_0, int current_candidate_gen_window_end_0){
my_ensemble_->LookAheadSlidingWindow(current_candidate_gen_window_end_0, *(vc_->ref_reader), allele_groups_ready_to_go, alleles_on_hold, sliding_window_start_0, sliding_window_end_0, max_group_size_allowed_);
my_ensemble_->LookAheadSlidingWindow(current_candidate_gen_window_end_0, *(vc_->ref_reader), allele_groups_ready_to_go, alleles_on_hold, sliding_window_start_0, sliding_window_end_0, max_group_size_allowed_, vc_->targets_manager);
}

// The 1-based coordinate version of LookAheadSlidingWindow0.
void CandidateExaminer::LookAheadSlidingWindow1(list<list<int> >& allele_groups_ready_to_go, vector<int>& alleles_on_hold, int& sliding_window_start_1, int& sliding_window_end_1, int current_candidate_gen_window_end_1){
my_ensemble_->LookAheadSlidingWindow(--current_candidate_gen_window_end_1, *(vc_->ref_reader), allele_groups_ready_to_go, alleles_on_hold, sliding_window_start_1, sliding_window_end_1, max_group_size_allowed_);
my_ensemble_->LookAheadSlidingWindow(--current_candidate_gen_window_end_1, *(vc_->ref_reader), allele_groups_ready_to_go, alleles_on_hold, sliding_window_start_1, sliding_window_end_1, max_group_size_allowed_,vc_->targets_manager);
++sliding_window_start_1;
++sliding_window_end_1;
}
Expand Down
7 changes: 6 additions & 1 deletion Analysis/VariantCaller/HypothesisEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,12 @@ void CalculateHypPredictions(
}
i_base++;
}

// TS-17279
if (i_flow < flow_upper_bound){
if (flow_order.nuc_at(i_flow) == Hypotheses[i_hyp].back()){
++i_flow;
}
}
// Find last main incorporating flow of all hypotheses
max_last_flow = max(max_last_flow, i_flow);
min_last_flow = min(min_last_flow, i_flow);
Expand Down
1 change: 1 addition & 0 deletions Analysis/VariantCaller/MolecularTag.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,7 @@ void ConsensusAlignmentManager::PartiallyCopyAlignmentFromAnother_(Alignment& al
alignment.alignment = template_alignment.alignment;
// Copy all the member variables determined in AlleleParser::UnpackReadAllele
alignment.original_position = template_alignment.original_position;
alignment.original_end_position = template_alignment.original_end_position;
alignment.start = template_alignment.start;
alignment.end = template_alignment.end;
alignment.snp_count = template_alignment.snp_count;
Expand Down
6 changes: 4 additions & 2 deletions Analysis/VariantCaller/Reads/ExtendedReadInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,15 @@ void GetPrefixFlow(Alignment *rai, const string & prefix_bases, const ion::FlowO
// Sets the member variables ref_aln, seq_aln, pretty_aln, startSC, endSC
void UnpackAlignmentInfo(Alignment *rai)
{
// TS-17069 Use the original CIGAR for splicing
const vector<CigarOp>& cigar_for_unpack = rai->old_cigar;
rai->left_sc = 0;
rai->right_sc = 0;

unsigned int num_query_bases = 0;
bool match_found = false;

for (vector<CigarOp>::const_iterator cigar = rai->alignment.CigarData.begin(); cigar != rai->alignment.CigarData.end(); ++cigar) {
for (vector<CigarOp>::const_iterator cigar = cigar_for_unpack.begin(); cigar != cigar_for_unpack.end(); ++cigar) {
switch (cigar->Type) {
case 'M':
case '=':
Expand Down Expand Up @@ -96,7 +98,7 @@ void UnpackAlignmentInfo(Alignment *rai)
// Basic alignment sanity check
if (num_query_bases != rai->alignment.QueryBases.length()) {
cerr << "WARNING in ExtendedReadInfo::UnpackAlignmentInfo: Invalid Cigar String in Read " << rai->alignment.Name << " Cigar: ";
for (vector<CigarOp>::const_iterator cigar = rai->alignment.CigarData.begin(); cigar != rai->alignment.CigarData.end(); ++cigar)
for (vector<CigarOp>::const_iterator cigar = cigar_for_unpack.begin(); cigar != cigar_for_unpack.end(); ++cigar)
cerr << cigar->Length << cigar->Type;
cerr << " Length of query string: " << rai->alignment.QueryBases.length() << endl;
assert(num_query_bases == rai->alignment.QueryBases.length());
Expand Down
6 changes: 4 additions & 2 deletions Analysis/VariantCaller/Splice/SpliceVariantHypotheses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ bool SpliceVariantHypotheses(const Alignment &current_read, const EnsembleEval &
}

int read_idx = current_read.left_sc;
int ref_idx = current_read.alignment.Position;
// TS-17069 Use the original position for splicing
int ref_idx = current_read.original_position;
int read_idx_max = current_read.alignment.QueryBases.length() - current_read.right_sc;
bool did_splicing = false;
bool just_did_splicing = false;
Expand Down Expand Up @@ -411,7 +412,8 @@ string SpliceDoRealignement (PersistingThreadObjects &thread_objects, const Alig

// --- Get index positions at snp variant position
int read_idx = current_read.left_sc;
int ref_idx = current_read.alignment.Position;
// TS-17069 Use the original position for splicing
int ref_idx = current_read.original_position;
unsigned int pretty_idx = 0;

while (pretty_idx < current_read.pretty_aln.length() and ref_idx < variant_position) {
Expand Down
91 changes: 90 additions & 1 deletion Analysis/VariantCaller/TargetsManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ bool CompareTargets(TargetsManager::UnmergedTarget *i, TargetsManager::UnmergedT
void TargetsManager::Initialize(const ReferenceReader& ref_reader, const string& _targets, float min_cov_frac, bool _trim_ampliseq_primers /*const ExtendParameters& parameters*/)
{
min_coverage_fraction = min_cov_frac;
chr_to_merged_idx.assign(ref_reader.chr_count(), -1);
//
// Step 1. Retrieve raw target definitions
//
Expand Down Expand Up @@ -114,8 +115,11 @@ void TargetsManager::Initialize(const ReferenceReader& ref_reader, const string&
merged.back().begin = unmerged[idx].begin;
merged.back().end = unmerged[idx].end;
merged.back().first_unmerged = idx;
if (chr_to_merged_idx[unmerged[idx].chr] < 0){
chr_to_merged_idx[unmerged[idx].chr] = (int) merged.size() - 1;
}
}
unmerged[idx].merged = merged.size();
unmerged[idx].merged = (int) merged.size() - 1;
}

if (_targets.empty()) {
Expand Down Expand Up @@ -595,3 +599,88 @@ void TargetsManager::AddCoverageToRegions(const map<int, TargetStat>& stat_of_ta
}
pthread_mutex_unlock(&coverage_counter_mutex_);
}

bool TargetsManager::IsCoveredByMerged(int merged_idx, int chr, long pos) const{
// Skip the check of chromosone if chr < 0.
if (chr >= 0 and merged[merged_idx].chr != chr){
return false;
}

// Note that the regions are left-close and right-open.
return (pos >= merged[merged_idx].begin) and (pos < merged[merged_idx].end);

}

// Is the (0-based) interval [pos_start, pos_end) fully covered by merged[merged_idx]?
bool TargetsManager::IsFullyCoveredByMerged(int merged_idx, int chr, long pos_start, long pos_end) const{
// Skip the check of chromosone if chr < 0.
if (chr >= 0 and merged[merged_idx].chr != chr){
return false;
}
assert(pos_start <= pos_end);
// Note that the regions are left-close and right-open.
return (pos_start >= merged[merged_idx].begin) and (pos_end < merged[merged_idx].end);

}

// Definition [Breaking Interval]
// An interval [pos_start, pos_end) is a breaking interval of the merged target if there is NO unmerged target (in the merged target) that fully cover [pos_start, pos_end).
bool TargetsManager::IsBreakingIntervalInMerged(int merged_idx, int chr, long pos_start, long pos_end) const{
assert(pos_start <= pos_end);
// Note that chr < 1 skips the check of chromosome.
if (not IsFullyCoveredByMerged(merged_idx, chr, pos_start, pos_end)){
// By definition, a position that is not in the merged target is not its breaking point.
return false;
}
for (int unmerged_idx = merged[merged_idx].first_unmerged; unmerged_idx < (int) unmerged.size(); ++unmerged_idx){
if (unmerged[unmerged_idx].merged != merged_idx){
break;
}
if (pos_start >= unmerged[unmerged_idx].begin and pos_end <= unmerged[unmerged_idx].end){
// Not a breaking point if I find a unmerged region that covers [pos_start, pos_end)
return false;
}
}
// No such unmerged region is found. Must be a breaking point.
return true;
}

int TargetsManager::FindMergedTargetIndex(int chr, long pos) const{
// merged_idx_start is the first index of merged of the chromosome
int merged_idx_start = chr_to_merged_idx[chr];
// merged_idx_end is the last index of merged of the chromosome
int merged_idx_end = (int) merged.size() - 1;
// return -1 if no region of the chromosome
if (merged_idx_start < 0){
return -1;
}
if (IsCoveredByMerged(merged_idx_start, chr, pos)){
return merged_idx_start;
}

// Search for merged_idx_end
for (int chr_idx = chr + 1; chr_idx < (int) chr_to_merged_idx.size(); ++chr_idx){
if (chr_to_merged_idx[chr_idx] > 0){
merged_idx_end = chr_to_merged_idx[chr_idx] - 1;
break;
}
}
if (IsCoveredByMerged(merged_idx_end, chr, pos)){
return merged_idx_end;
}

// Binary search
int probe_idx = merged_idx_start + (merged_idx_end - merged_idx_start) / 2;
while (probe_idx != merged_idx_start and probe_idx != merged_idx_end){
if (IsCoveredByMerged(probe_idx, chr, pos)){
return probe_idx;
}
if (merged[probe_idx].begin > pos){
merged_idx_end = probe_idx;
}else{
merged_idx_start = probe_idx;
}
probe_idx = merged_idx_start + (merged_idx_end - merged_idx_start) / 2;
}
return -1;
}
5 changes: 5 additions & 0 deletions Analysis/VariantCaller/TargetsManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,14 @@ class TargetsManager {
void AddCoverageToRegions(const map<int, TargetStat>& stat_of_targets);
void WriteTargetsCoverage(const string& file_path, const ReferenceReader& ref_reader, bool use_best_target, bool use_mol_tags) const;
int ReportHotspotsOnly(const MergedTarget &merged, int chr, long pos);
bool IsCoveredByMerged(int merged_idx, int chr, long pos) const;
bool IsFullyCoveredByMerged(int merged_idx, int chr, long pos_start, long pos_end) const;
bool IsBreakingIntervalInMerged(int merged_idx, int chr, long pos_start, long pos_end) const;
int FindMergedTargetIndex(int chr, long pos) const;

vector<UnmergedTarget> unmerged;
vector<MergedTarget> merged;
vector<int> chr_to_merged_idx;
bool trim_ampliseq_primers;

// The following variables are just for bool FilterReadByRegion(Alignment* rai, int recent_target) use only.
Expand Down
2 changes: 1 addition & 1 deletion Analysis/VariantCaller/tvcutils/prepare_hotspots.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ class junction {
if (junc_.size() == 0) return true;
if (id < 0) return false;
if (id >= (int) junc_.size()) return false;
return junc_[id].contained_in_ampl(pos, pos+len-1);
return junc_[id].contained_in_ampl(pos, pos+len);
}
void add(int id, int beg, int end) {
if (id >= (int) junc_.size()) return;
Expand Down
14 changes: 13 additions & 1 deletion Analysis/VariantCaller/tvcutils/unify_vcf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,12 @@ int VcfOrderedMerger::variant_cmp_secplus(const T* v1, const vcf::Variant* v2) c
return compare(idx_v1, v1->position, idx_v2, (long) (v2->position+50+v2->ref.length()));
}

template <typename T>
int VcfOrderedMerger::variant_cmp_secplus_more(const T* v1, const vcf::Variant* v2) const {
int idx_v1 = reference_reader.chr_idx(v1->sequenceName.c_str());
int idx_v2 = reference_reader.chr_idx(v2->sequenceName.c_str());
return compare(idx_v1, v1->position, idx_v2, (long) (v2->position+250));
}


// -----------------------------------------------------------------------------------
Expand All @@ -382,6 +388,12 @@ bool VcfOrderedMerger::too_far(vcf::Variant* v1, vcf::Variant* v2) {
*/
}

bool VcfOrderedMerger::too_far_far(vcf::Variant* v1, vcf::Variant* v2) {
if (v2 == NULL) return true;
int com = variant_cmp_secplus_more(v2, v1);
return (com == -1);
}

// -----------------------------------------------------------------------------------
// position in vcf is 1-based
// targets in bed format are 0-based open ended intervals
Expand Down Expand Up @@ -1129,7 +1141,7 @@ void VcfOrderedMerger::flush_vcf(vcf::Variant* latest)
while (not variant_list.empty()) {
vcf::Variant* current = &(*variant_list.begin());

if (too_far(current, latest)) {
if (too_far_far(current, latest)) {
if (is_within_target_region(current)) {
unsigned int i;
bool has_oid = false;
Expand Down
4 changes: 4 additions & 0 deletions Analysis/VariantCaller/tvcutils/unify_vcf.h
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,9 @@ class VcfOrderedMerger {
template <typename T>
int variant_cmp_secplus(const T* v1, const vcf::Variant* v2) const;

template <typename T>
int variant_cmp_secplus_more(const T* v1, const vcf::Variant* v2) const;

template <typename T>
bool is_within_target_region(T *variant);

Expand Down Expand Up @@ -246,6 +249,7 @@ class VcfOrderedMerger {

void span_ref_and_alts();
bool too_far(vcf::Variant*, vcf::Variant*);
bool too_far_far(vcf::Variant*, vcf::Variant*);
bool find_and_merge_assembly();

void generate_novel_annotations(vcf::Variant* variant);
Expand Down
2 changes: 1 addition & 1 deletion Analysis/version
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Version for target of current development cycle
MAJOR=5
MINOR=10
RELEASE=10
RELEASE=11
2 changes: 1 addition & 1 deletion TSVersion
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Version for target of current development cycle
TS_MAJOR=5
TS_MINOR=10
TS_RELEASE=0
TS_RELEASE=1
Loading

0 comments on commit d4a04a9

Please sign in to comment.