17
17
layout_output = sys .argv [6 ]
18
18
layscf_output = sys .argv [7 ]
19
19
20
+ min_contig_no_trim = 500000
20
21
min_read_len_fraction = 0.5
21
22
min_read_fromend_fraction = min_read_len_fraction / 1.5
22
23
min_exact_len_fraction = min_read_len_fraction / 3
@@ -243,7 +244,7 @@ def get_exact_match_length(clusters):
243
244
# Find all words that
244
245
# begin with [<>], contain anything but [
245
246
# begin with [N, contain digits and end with N] or N:optional-description]
246
- # we dump the description here and anly keep the N, digits N] part
247
+ # we dump the description here and anly keep the N, digits N] part
247
248
#
248
249
fullname = lp [0 ]
249
250
pathfull = re .findall (r"([<>][^[]+|\[N\d+N(?:[^\]]+){0,1}\])" , lp [1 ])
@@ -530,6 +531,7 @@ def get_exact_match_length(clusters):
530
531
# contig actually has pieces, output the scaffold map. (The header line
531
532
# output from rukki looks like a contig with no pieces.)
532
533
#
534
+ no_trim = set ()
533
535
nameid = 1
534
536
for contig in sorted (contig_pieces .keys ()):
535
537
npieces = ngaps = nempty = 0
@@ -615,6 +617,7 @@ def get_exact_match_length(clusters):
615
617
print (f"path { outname } { contig } " , file = scf_layout_file )
616
618
617
619
for line in contig_pieces [contig ]:
620
+ if len (contig_pieces [contig ]) > 2 and (line == contig_pieces [contig ][0 ] or line == contig_pieces [contig ][- 2 ]): no_trim .add (line )
618
621
print (line , file = scf_layout_file )
619
622
620
623
nameid += 1
@@ -623,7 +626,6 @@ def get_exact_match_length(clusters):
623
626
624
627
del nameid
625
628
626
-
627
629
for contig in sorted (contig_actual_lines .keys ()):
628
630
if len (contig_actual_lines [contig ]) == 0 : continue
629
631
assert len (contig_actual_lines [contig ]) > 0
@@ -637,6 +639,10 @@ def get_exact_match_length(clusters):
637
639
end_pos = max (end_pos , line [2 ])
638
640
print (f"tig\t { contig } " , file = tig_layout_file )
639
641
print (f"len\t { end_pos - start_pos } " , file = tig_layout_file )
642
+ if end_pos - start_pos >= min_contig_no_trim or contig in no_trim :
643
+ print (f"trm\t 1" , file = tig_layout_file )
644
+ else :
645
+ print (f"trm\t 0" , file = tig_layout_file )
640
646
print (f"rds\t { len (contig_actual_lines [contig ])} " , file = tig_layout_file )
641
647
for line in contig_actual_lines [contig ]:
642
648
bgn = line [1 ] - start_pos
0 commit comments