Skip to content

Commit

Permalink
Fixes #293 and integrates pyrodigal v3 and pyrodigal-gv releases, add…
Browse files Browse the repository at this point in the history
…s some tests
  • Loading branch information
gbouras13 committed Sep 18, 2023
1 parent e71c1ec commit 75c07ae
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 7 deletions.
26 changes: 23 additions & 3 deletions bin/input_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,13 +205,33 @@ def validate_fasta(filename):
else:
with open(filename, "r") as handle:
fasta = SeqIO.parse(handle, "fasta")
logger.info("Checking Input FASTA.")
logger.info(f"Checking input {filename}.")
if any(fasta):
logger.info("FASTA checked.")
logger.info(f"Input {filename} is in FASTA format.")
else:
logger.error("Error: Input file is not in the FASTA format.")

# check for duplicate headers
check_duplicate_headers(filename)
logger.info(f"All entries in {filename} has unique headers.")

def check_duplicate_headers(fasta_file):
"""
checks if there are duplicated in the FASTA header
in response to Tina's issue
https://github.com/gbouras13/pharokka/issues/293
"""
header_set = set()

# Iterate through the FASTA file and check for duplicate headers
for record in SeqIO.parse(fasta_file, "fasta"):
header = record.description
if header in header_set:
logger.error(f"Duplicate header found: {header}") # errors if duplicate header found
else:
header_set.add(header)
# if it finished it will be fine

def validate_gene_predictor(gene_predictor, genbank_flag):
if gene_predictor == "phanotate":
logger.info("Phanotate will be used for gene prediction.")
Expand All @@ -230,7 +250,7 @@ def validate_gene_predictor(gene_predictor, genbank_flag):
)
else:
logger.error(
"Error: gene predictor was incorrectly specified. Please use 'phanotate', 'prodigal', 'prodigal-gv' or 'custom'."
"Error: gene predictor was incorrectly specified. Please use 'phanotate', 'prodigal' or 'prodigal-gv'."
)


Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ dependencies:
- aragorn >=1.2.41
- mash >=2.2
- dnaapler >=0.3.0
- pyrodigal >=2.0.1
- pyrodigal >=3.0.0
- pycirclize >=0.3.1
- alive-progress >=3.0.1
- requests >=2.25.1
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def package_files(directory):
"alive-progress>=3.0.1",
"requests>=2.25.1",
"bcbio-gff >=0.7.0",
#"pyrodigal >=3.0.0",
#"pyrodigal_gv >= 0.1.0"
"pyrodigal >=3.0.0",
"pyrodigal_gv >= 0.1.0"
],
)
34 changes: 34 additions & 0 deletions tests/test_data/overall/Standard_examples/dupe_header.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
>MW460250_1
CCCCACCCCGCCATCCCCGATTCATGAGCTATGTTCTAAGTCGATACCATTTAATAAGATAGGGTCATCT
TCTTTACCTACCATATAATCAGATAGTAAGTCTGCTTCAGCTTTTTGCCCTGGTCGTGATAGTTTAGATT
TCTTAGTTTCAATACGCATAATGTGACCATTGTATTAAATAATTAGAATACTATTTTAAAAGATTCTATT
CTGTTTGGATTAATATATACTTGAGGTGAAGTTATAGCACTTTCAGTATATACTTTTATAGAGGTTTCAT
CCATTCCTCTTAACATATAATCTATATCTTGCCTATTGTAACTCTTTTCATCAGTAGATACTAAAAAGTA
TTTAGCTCCACTTGACATTGTTATTTCAATATGTTTTGACATCTACAATCTCTCCTATGCAAATTTGTTA
AAGACAAAGGATAATATAGCTCCTAGAACAAGTAAAAGAACCTTCTCAGTTGTATCCTTTTTCTCAGTAT
CCTTAGTTTTTGTACTTTCAGCAAGTTCTGAAATCTTTTCATCAAGTCTTTCTAATTGGACGTAAATTGC
TGATTGTTTTTCACTATTGACAGCTACATCTTTATCTATACTAACTATCATTTTTCTTAGTTCAGCTACC
TCAACTTCTAAATCTTTGAAAGTTCCTCTATCTATATAATTACCTTCTTGTATCTTAGACTTAATAGTTT
CTACTTGAGAAACAAGGTTGTTTATCTCCTTATCCAACTAGAATCACCTCTAAGGTCTAACCGTTTCAGA
TTCAGAATGGATATCATAATTTTCTAAGAAATCATTGATAATCTCCATATAATTATCCGTAACGACTTTT
CCGTAAGATGTTTTTGTATCAATTTCAAACCTAAGCTTACCAAAACTTTGGAGGTCTAATTCTTTTATTA
CAATATTAGGGTCATCAGAAGGAAGGTAATAATAGTCGAAGTATATAATTGAGCCATTTATTAATACTCT
GTCTATTCTATAGACGTGGAAATAGCGTCTGTCTCTTTTAAAATGGGCTAGTGCATCTTTAAACTCTAAC
TTAAGGATATCCTTATATTTAATCAAAGTGGTAACCTCCTTACTATTAATTTTTAAATTTACTTATTTTG
>MW460250_1
CCCCACCCCGCCATCCCCGATTCATGAGCTATGTTCTAAGTCGATACCATTTAATAAGATAGGGTCATCT
TCTTTACCTACCATATAATCAGATAGTAAGTCTGCTTCAGCTTTTTGCCCTGGTCGTGATAGTTTAGATT
TCTTAGTTTCAATACGCATAATGTGACCATTGTATTAAATAATTAGAATACTATTTTAAAAGATTCTATT
CTGTTTGGATTAATATATACTTGAGGTGAAGTTATAGCACTTTCAGTATATACTTTTATAGAGGTTTCAT
CCATTCCTCTTAACATATAATCTATATCTTGCCTATTGTAACTCTTTTCATCAGTAGATACTAAAAAGTA
TTTAGCTCCACTTGACATTGTTATTTCAATATGTTTTGACATCTACAATCTCTCCTATGCAAATTTGTTA
AAGACAAAGGATAATATAGCTCCTAGAACAAGTAAAAGAACCTTCTCAGTTGTATCCTTTTTCTCAGTAT
CCTTAGTTTTTGTACTTTCAGCAAGTTCTGAAATCTTTTCATCAAGTCTTTCTAATTGGACGTAAATTGC
TGATTGTTTTTCACTATTGACAGCTACATCTTTATCTATACTAACTATCATTTTTCTTAGTTCAGCTACC
TCAACTTCTAAATCTTTGAAAGTTCCTCTATCTATATAATTACCTTCTTGTATCTTAGACTTAATAGTTT
CTACTTGAGAAACAAGGTTGTTTATCTCCTTATCCAACTAGAATCACCTCTAAGGTCTAACCGTTTCAGA
TTCAGAATGGATATCATAATTTTCTAAGAAATCATTGATAATCTCCATATAATTATCCGTAACGACTTTT
CCGTAAGATGTTTTTGTATCAATTTCAAACCTAAGCTTACCAAAACTTTGGAGGTCTAATTCTTTTATTA
CAATATTAGGGTCATCAGAAGGAAGGTAATAATAGTCGAAGTATATAATTGAGCCATTTATTAATACTCT
GTCTATTCTATAGACGTGGAAATAGCGTCTGTCTCTTTTAAAATGGGCTAGTGCATCTTTAAACTCTAAC
TTAAGGATATCCTTATATTTAATCAAAGTGGTAACCTCCTTACTATTAATTTTTAAATTTACTTATTTTG
17 changes: 16 additions & 1 deletion tests/test_overall.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,14 +119,18 @@ def test_meta(tmp_dir):
cmd = f"pharokka.py -i {input_fasta} -d {database_dir} -o {tmp_dir} -t {threads} -f -m"
exec_command(cmd)

def test_meta(tmp_dir):
"""test pharokka meta with prodigal-gv"""
input_fasta: Path = f"{meta_data}/combined_meta.fasta"
cmd = f"pharokka.py -i {input_fasta} -d {database_dir} -o {tmp_dir} -t {threads} -f -m -g prodigal-gv"
exec_command(cmd)

def test_meta_dnaapler_all_bug(tmp_dir):
"""test pharokka meta dnaapler bug and split"""
input_fasta: Path = f"{meta_data}/combined_meta.fasta"
cmd = f"pharokka.py -i {input_fasta} -d {database_dir} -o {tmp_dir} -t {threads} -f -m -s --dnaapler --meta_hmm"
exec_command(cmd)


def test_overall_locus(tmp_dir):
"""test pharokka overall locus tag prefix"""
input_fasta: Path = f"{standard_data}/SAOMS1.fasta"
Expand Down Expand Up @@ -189,6 +193,10 @@ def test_meta_no_cds_contig(tmp_dir):
cmd = f"pharokka.py -i {input_fasta} -d {database_dir} -o {tmp_dir} -t {threads} -f -m"
exec_command(cmd)

######
# pharokka CI was timing out (>6 hours)
# These are covered by other rules anyway
# so just run as is

# def test_meta_hmm(tmp_dir):
# """test pharokka meta hmm"""
Expand Down Expand Up @@ -228,6 +236,13 @@ def test_overall_genbank_meta(tmp_dir):
class testFails(unittest.TestCase):
"""Tests for fails"""

def test_dupe_header(self):
"""tests that pharokka exits if a duplicate header is passed"""
with self.assertRaises(RuntimeError):
input_fasta: Path = f"{standard_data}/dupe_header.fasta"
cmd = f"pharokka.py -i {input_fasta} -d {database_dir} -o {temp_dir} -t 1 -f -m"
exec_command(cmd)

def test_meta_with_single_contig(self):
"""tests that pharokka exits if single contig is passed to meta"""
with self.assertRaises(RuntimeError):
Expand Down

0 comments on commit 75c07ae

Please sign in to comment.