Skip to content

Commit 40388bc

Browse files
committed
update canu tag and use per-contig trim setting to avoid trimming telomere while still trimming internal nodes to supported region
1 parent 33c2079 commit 40388bc

File tree

3 files changed

+10
-4
lines changed

3 files changed

+10
-4
lines changed

src/Snakefiles/7-generateConsensus.sm

+1-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ fi
6161
-threads {threads} \\\\
6262
-import ../{input.package} \\\\
6363
-A ../{output.consensus}.WORKING \\\\
64-
-C 1 \\\\
64+
-C 2 \\\\
6565
\$align \\\\
6666
-maxcoverage 50 \\\\
6767
-e 0.05 \\\\

src/scripts/get_layout_from_mbg.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
layout_output = sys.argv[6]
1818
layscf_output = sys.argv[7]
1919

20+
min_contig_no_trim = 500000
2021
min_read_len_fraction = 0.5
2122
min_read_fromend_fraction = min_read_len_fraction/1.5
2223
min_exact_len_fraction = min_read_len_fraction/3
@@ -243,7 +244,7 @@ def get_exact_match_length(clusters):
243244
# Find all words that
244245
# begin with [<>], contain anything but [
245246
# begin with [N, contain digits and end with N] or N:optional-description]
246-
# we dump the description here and anly keep the N, digits N] part
247+
# we dump the description here and anly keep the N, digits N] part
247248
#
248249
fullname = lp[0]
249250
pathfull = re.findall(r"([<>][^[]+|\[N\d+N(?:[^\]]+){0,1}\])", lp[1])
@@ -530,6 +531,7 @@ def get_exact_match_length(clusters):
530531
# contig actually has pieces, output the scaffold map. (The header line
531532
# output from rukki looks like a contig with no pieces.)
532533
#
534+
no_trim = set()
533535
nameid = 1
534536
for contig in sorted(contig_pieces.keys()):
535537
npieces = ngaps = nempty = 0
@@ -615,6 +617,7 @@ def get_exact_match_length(clusters):
615617
print(f"path {outname} {contig}", file=scf_layout_file)
616618

617619
for line in contig_pieces[contig]:
620+
if len(contig_pieces[contig]) > 2 and (line == contig_pieces[contig][0] or line == contig_pieces[contig][-2]): no_trim.add(line)
618621
print(line, file=scf_layout_file)
619622

620623
nameid += 1
@@ -623,7 +626,6 @@ def get_exact_match_length(clusters):
623626

624627
del nameid
625628

626-
627629
for contig in sorted(contig_actual_lines.keys()):
628630
if len(contig_actual_lines[contig]) == 0: continue
629631
assert len(contig_actual_lines[contig]) > 0
@@ -637,6 +639,10 @@ def get_exact_match_length(clusters):
637639
end_pos = max(end_pos, line[2])
638640
print(f"tig\t{contig}", file=tig_layout_file)
639641
print(f"len\t{end_pos - start_pos}", file=tig_layout_file)
642+
if end_pos-start_pos >= min_contig_no_trim or contig in no_trim:
643+
print(f"trm\t1", file=tig_layout_file)
644+
else:
645+
print(f"trm\t0", file=tig_layout_file)
640646
print(f"rds\t{len(contig_actual_lines[contig])}", file=tig_layout_file)
641647
for line in contig_actual_lines[contig]:
642648
bgn = line[1] - start_pos

0 commit comments

Comments
 (0)