Skip to content

Commit 55e9c6e

Browse files
fix for non-scaffolding in alternative direction
1 parent cf66459 commit 55e9c6e

File tree

2 files changed

+36
-4
lines changed

2 files changed

+36
-4
lines changed

src/Snakefiles/8-hicPipeline.sm

+1
Original file line numberDiff line numberDiff line change
@@ -589,6 +589,7 @@ rule HiC_rdnascaff:
589589
unitigs_telo = '8-hicPipeline/unitigs.telo',
590590
unitigs_nohpc50 = '8-hicPipeline/unitigs_nonhpc50.mashmap',
591591
path_mashmap = rules.prepareScaffolding.output.path_mashmap,
592+
contigs = rules.copyFiles.output.unitig_fasta,
592593
prefiltered_aln = rules.scaffoldMergeBWA.output.alignments if config['withPOREC'] == "False" else rules.transformBWA.output.byread_mapping
593594
output:
594595
scaff_tsv_path = '8-hicPipeline/scaff_rukki.paths.tsv',

src/scripts/scaffolding/scaffold_graph.py

+35-4
Original file line numberDiff line numberDiff line change
@@ -438,7 +438,7 @@ def getScores(self, or_path_id, path_storage, type):
438438
continue
439439
next_nor_path = gf.nor_path_id(next_or_path)
440440
#TODO: optimize homology counting, once per all paths
441-
if next_nor_path in homologous_paths:
441+
if next_nor_path in homologous_paths or next_nor_path == nor_path_id:
442442
continue
443443
else:
444444
if not precounted:
@@ -562,6 +562,8 @@ def generateScaffolds(self):
562562
cur_scaffold = [or_from_path_id]
563563
cur_path_id = or_from_path_id
564564
nor_used_path_ids.add(or_from_path_id.strip('-+'))
565+
#lets add bidirectional expansion
566+
tried_reverse = False
565567
while True:
566568
next_path_id = self.findNextPath(cur_path_id, nor_used_path_ids, "weight")
567569
if next_path_id == "NONE":
@@ -570,10 +572,29 @@ def generateScaffolds(self):
570572

571573
if next_path_id == "DONE":
572574
self.logger.info ("All done, stopped at telomere")
573-
break
575+
if tried_reverse:
576+
break
577+
else:
578+
self.logger.info (f"Reversing {cur_scaffold}")
579+
cur_scaffold = [gf.rc_path_id(x) for x in cur_scaffold]
580+
cur_scaffold.reverse()
581+
self.logger.info (f"Reversed {cur_scaffold}")
582+
cur_path_id = cur_scaffold[-1]
583+
tried_reverse = True
584+
continue
574585
elif next_path_id == "NONE":
575586
self.logger.info ("Failed to find extension, stopping")
576-
break
587+
if tried_reverse:
588+
break
589+
else:
590+
self.logger.info (f"Reversing {cur_scaffold}")
591+
cur_scaffold = [gf.rc_path_id(x) for x in cur_scaffold]
592+
cur_scaffold.reverse()
593+
self.logger.info (f"Reversed {cur_scaffold}")
594+
cur_path_id = cur_scaffold[-1]
595+
tried_reverse = True
596+
continue
597+
577598
else:
578599
hom_before = False
579600
nor_new_path_id = gf.nor_path_id(next_path_id)
@@ -587,13 +608,23 @@ def generateScaffolds(self):
587608
hom_before = True
588609
if hom_before:
589610
self.logger.info (f"Homologous paths before in scaffold, not extending {cur_path_id} with {next_path_id}")
590-
break
611+
if tried_reverse:
612+
break
613+
else:
614+
cur_scaffold = [gf.rc_path_id(x) for x in cur_scaffold]
615+
cur_scaffold.reverse()
616+
cur_path_id = cur_scaffold[-1]
617+
tried_reverse = True
618+
continue
591619
self.logger.info (f"Extending {cur_path_id} with {next_path_id}")
592620

593621
#possibly not do so with paths of length one? They can be more successful in other direction
594622
cur_scaffold.append(next_path_id)
595623
nor_used_path_ids.add(next_path_id.strip('-+'))
596624
cur_path_id = next_path_id
625+
#Reversing for comparison with previous runs only
626+
cur_scaffold = [gf.rc_path_id(x) for x in cur_scaffold]
627+
cur_scaffold.reverse()
597628
self.logger.info(f"scaffold {cur_scaffold}\n")
598629
if len(cur_scaffold) > 1:
599630
res.append(cur_scaffold)

0 commit comments

Comments
 (0)