Skip to content

Commit b47102c

Browse files
Fix for empty pieces in scfmap, now gluing small and intentionally crashing for large, #236
1 parent 8f3d550 commit b47102c

File tree

1 file changed

+34
-0
lines changed

1 file changed

+34
-0
lines changed

src/scripts/get_layout_from_mbg.py

+34
Original file line numberDiff line numberDiff line change
@@ -543,9 +543,43 @@ def get_exact_match_length(clusters):
543543
npieces += 1 # Piece with reads assigned.
544544
elif line != "end":
545545
nempty += 1 # Piece with no reads assigned.
546+
547+
if contig_lens[line] > 100000:
548+
sys.stderr.write(f"ERROR: empty entry for piece {line} in scaffold {contig} of LARGE expected length {contig_lens[line]}\n")
549+
exit(1)
546550
contig_pieces[contig][i] = "[N%dN]"%(contig_lens[line])
547551
if npieces > 0:
548552
print(f"Warning: empty entry for piece {line} in scaffold {contig} of expected length {contig_lens[line]}, using Ns {contig_pieces[contig][i]} instead", file=nul_layout_file)
553+
#postprocessing to avoid consecutive empty pieces
554+
i = 0
555+
new_pieces = []
556+
previous = "NONE"
557+
while i < len(contig_pieces[contig]):
558+
line = contig_pieces[contig][i]
559+
i += 1
560+
is_gap = re.match(r"\[N\d+N\]", line)
561+
if is_gap:
562+
#just ignoring leading gaps
563+
if previous == "NONE":
564+
continue
565+
elif previous == "piece":
566+
new_pieces.append(line)
567+
previous = "gap"
568+
else:
569+
#merging consecutive gaps
570+
previous = "gap"
571+
last = new_pieces.pop()
572+
last_int = int(last[2:-2])
573+
cur_int = int(line[2:-2])
574+
new_pieces.append("[N%dN]"%(last_int + cur_int))
575+
else:
576+
new_pieces.append(line)
577+
# it can actually be 'end' but who cares then
578+
previous = "piece"
579+
#removing trailing gaps
580+
while len(new_pieces) > 0 and re.match(r"\[N\d+N\]", new_pieces[-1]):
581+
new_pieces.pop()
582+
contig_pieces[contig] = new_pieces
549583

550584
if npieces > 0:
551585
if nempty > 0:

0 commit comments

Comments
 (0)