Skip to content

Commit 0cc111a

Browse files
committed
Fixes bug when alignments are located at the initial positions in references without N padding (scaffolds, ...) (#12)
1 parent 925c5c0 commit 0cc111a

File tree

1 file changed

+8
-65
lines changed

1 file changed

+8
-65
lines changed

src/dna/sa_mapper_stage.c

+8-65
Original file line numberDiff line numberDiff line change
@@ -666,18 +666,7 @@ void check_pairs(array_list_t **cal_lists, sa_index3_t *sa_index,
666666

667667
list1 = cal_lists[i];
668668
list2 = cal_lists[i+1];
669-
/*
670-
////////////////////////
671-
printf(">>>> mate #1 (%i):\n", array_list_size(list1));
672-
for (int i = 0; i < array_list_size(list1); i++) {
673-
seed_cal_print(array_list_get(i, list1));
674-
}
675-
printf(">>>> mate #2 (%i):\n", array_list_size(list2));
676-
for (int i = 0; i < array_list_size(list2); i++) {
677-
seed_cal_print(array_list_get(i, list2));
678-
}
679-
////////////////////////
680-
*/
669+
681670
list1_size = array_list_size(list1);
682671
list2_size = array_list_size(list2);
683672

@@ -692,9 +681,6 @@ void check_pairs(array_list_t **cal_lists, sa_index3_t *sa_index,
692681
//if (i2 == 0 && cal2->mapq < 10) break;
693682

694683
list = search_mate_cal_by_prefixes(cal2, read1, sa_index, batch, cal_mng);
695-
//printf("--> search mates for mate #1:\n");
696-
//seed_cal_print(cal2);
697-
//printf("----> mates found %i\n", array_list_size(list));
698684
if (array_list_size(list) > 0) {
699685
for (int kk = 0; kk < array_list_size(list); kk++) {
700686
cal = array_list_get(kk, list);
@@ -1029,6 +1015,8 @@ int generate_cals_from_suffixes(int strand, fastq_read_t *read,
10291015

10301016
seed = seed_new(r_start_suf, r_end_suf, g_start_suf, g_end_suf);
10311017

1018+
// int check = 0;
1019+
10321020
// extend suffix to left side, if necessary
10331021
if (r_start_suf > 0) {
10341022
r_start = 0;
@@ -1039,6 +1027,11 @@ int generate_cals_from_suffixes(int strand, fastq_read_t *read,
10391027
g_end = g_start_suf - 1;
10401028
g_start = g_end - g_len;
10411029

1030+
if ((int) g_start < 0) {
1031+
g_start = 0;
1032+
g_len = g_start_suf;
1033+
}
1034+
10421035
#ifdef _TIMING
10431036
gettimeofday(&start, NULL);
10441037
#endif
@@ -1054,7 +1047,6 @@ int generate_cals_from_suffixes(int strand, fastq_read_t *read,
10541047
#endif
10551048
score = doscadfun_inv(r_seq, r_len, g_seq, g_len, MISMATCH_PERC,
10561049
&alig_out);
1057-
10581050
#ifdef _TIMING
10591051
gettimeofday(&stop, NULL);
10601052
mapping_batch->func_times[FUNC_MINI_SW_LEFT_SIDE] +=
@@ -1072,30 +1064,6 @@ int generate_cals_from_suffixes(int strand, fastq_read_t *read,
10721064
if ( ((int) seed->genome_start) < 0) seed->genome_start = 0;
10731065
}
10741066

1075-
// if there's a mini-gap then try to fill the mini-gap
1076-
if (seed->read_start > 0 && seed->read_start < 5) {
1077-
#ifdef _TIMING
1078-
gettimeofday(&start, NULL);
1079-
#endif
1080-
g_seq = &sa_index->genome->S[seed->genome_start + sa_index->genome->chrom_offsets[chrom] - seed->read_start];
1081-
for (size_t k1 = 0, k2 = 0; k1 < seed->read_start; k1++, k2++) {
1082-
if (r_seq[k1] != g_seq[k2]) {
1083-
seed->num_mismatches++;
1084-
cigar_append_op(1, 'X', &seed->cigar);
1085-
} else {
1086-
cigar_append_op(1, '=', &seed->cigar);
1087-
}
1088-
}
1089-
1090-
seed->genome_start -= seed->read_start;
1091-
seed->read_start = 0;
1092-
#ifdef _TIMING
1093-
gettimeofday(&stop, NULL);
1094-
mapping_batch->func_times[FUNC_SEED_NEW] +=
1095-
((stop.tv_sec - start.tv_sec) + (stop.tv_usec - start.tv_usec) / 1000000.0f);
1096-
#endif
1097-
}
1098-
10991067
// update cigar with the sw output
11001068
if (score > 0.0f && alig_out.cigar.num_ops > 0) {
11011069
// if (alig_out.cigar.num_ops > 0) {
@@ -1149,31 +1117,6 @@ int generate_cals_from_suffixes(int strand, fastq_read_t *read,
11491117
cigar_concat(&alig_out.cigar, &seed->cigar);
11501118
}
11511119
}
1152-
1153-
// if there's a mini-gap then try to fill the mini-gap
1154-
diff = read->length - seed->read_end - 1;
1155-
if (diff > 0 && diff < 5) {
1156-
#ifdef _TIMING
1157-
gettimeofday(&start, NULL);
1158-
#endif
1159-
g_seq = &sa_index->genome->S[seed->genome_end + sa_index->genome->chrom_offsets[chrom] + 1];
1160-
for (size_t k1 = seed->read_end + 1, k2 = 0; k1 < read->length; k1++, k2++) {
1161-
if (r_seq[k1] != g_seq[k2]) {
1162-
seed->num_mismatches++;
1163-
cigar_append_op(1, 'X', &seed->cigar);
1164-
} else {
1165-
cigar_append_op(1, '=', &seed->cigar);
1166-
}
1167-
}
1168-
1169-
seed->read_end += diff;
1170-
seed->genome_end += diff;
1171-
#ifdef _TIMING
1172-
gettimeofday(&stop, NULL);
1173-
mapping_batch->func_times[FUNC_SEED_NEW] +=
1174-
((stop.tv_sec - start.tv_sec) + (stop.tv_usec - start.tv_usec) / 1000000.0f);
1175-
#endif
1176-
}
11771120
}
11781121

11791122
// update CAL manager with this seed

0 commit comments

Comments
 (0)