From 6350059de5c21a0bd7f0d19a94be177f0bfff96f Mon Sep 17 00:00:00 2001 From: ThomasHickman Date: Wed, 9 Aug 2017 15:55:58 +0100 Subject: [PATCH] Using pytest and runnning tests in parallel --- README.md | 10 ++- gatkcwlgenerator/tests/test.py | 111 +++++++++++++++------------------ setup.py | 1 + test_requirements.txt | 3 + 4 files changed, 61 insertions(+), 64 deletions(-) create mode 100644 test_requirements.txt diff --git a/README.md b/README.md index 8ac94a2..3535e6a 100644 --- a/README.md +++ b/README.md @@ -82,12 +82,18 @@ cwl-runner cwl_files_3.5/HaplotypeCaller.cwl examples/HaplotypeCaller_inputs.yml ## Tests -To run the tests, add example data to `cwl-example-data` such that `examples/HaplotypeCaller_inputs.yml` will run, then: +First install the test requirements +``` +pip install -r test_requirements.txt +``` +Then add example data to `cwl-example-data` such that `examples/HaplotypeCaller_inputs.yml` will run, then: ```bash -python gatkcwlgenerator/tests/test.py +py.test gatkcwlgenerator/tests/test.py ``` +You can also run the tests in parallel with `-n` to improve performance + ## Limitations: - The parameter `annotation` (for example, in [HaplotypeCaller](https://software.broadinstitute.org/gatk/documentation/tooldocs/current/org_broadinstitute_gatk_tools_walkers_haplotypecaller_HaplotypeCaller.php#--annotation)) is specified to take in a string in the generated CWL file, not an enumeration of all the possible options diff --git a/gatkcwlgenerator/tests/test.py b/gatkcwlgenerator/tests/test.py index 96be991..d3847c1 100644 --- a/gatkcwlgenerator/tests/test.py +++ b/gatkcwlgenerator/tests/test.py @@ -7,9 +7,9 @@ import subprocess import os from os import path -import unittest from multiprocessing import Process import tempfile +import pytest import requests @@ -62,13 +62,13 @@ def __init__(self, stdout, stderr, exitcode): path: {0}/cwl-example-data/chr22_cwl_test.cram out: out.gvcf.gz""".format(base_dir) -def run_haplotype_caller(extra_info="",interval=1, filetext=None, expect_failure=False): - return run_tool("HaplotypeCaller", extra_info, interval, filetext, expect_failure) +def run_haplotype_caller(extra_info, version, interval=1, filetext=None, expect_failure=False): + return run_tool("HaplotypeCaller", extra_info, version, interval, filetext, expect_failure) """ Runs the haplotype_caller tool with the specified data """ -def run_tool(toolname, extra_info="",interval=1, filetext=None, expect_failure=False): +def run_tool(toolname, extra_info, version, interval=1, filetext=None, expect_failure=False): if filetext is None: extra_info += "\nintervals: [chr22:10591400-{}]".format(10591400 + interval) filetext = "analysis_type: {}\n".format(toolname) + default_args + "\n" + extra_info @@ -76,53 +76,49 @@ def run_tool(toolname, extra_info="",interval=1, filetext=None, expect_failure=F with tempfile.NamedTemporaryFile() as f: f.write(filetext) f.flush() - return run_command("cwl-runner cwl_files_3.5/cwl/{}.cwl {}".format( + return run_command("cwl-runner cwl_files_{}/cwl/{}.cwl {}".format( + version, toolname, f.name ), expect_failure=expect_failure) # Unit tests -class TestGenerateCWL(unittest.TestCase): - supported_versions = ["3.5", "current"] - def test_get_json_links(self): - for version in self.supported_versions: - json_links = cwl_gen.get_json_links(version) - for link_type, links in json_links.__dict__.items(): - self.assertTrue(links, - "There are no links of type '{}' in gatk version {}".format( - link_type, - version - )) - - def test_no_arguments_in_annotator(self): - # If arguments are in annotator modules, we probably need to add them to the CWL file - for version in self.supported_versions: - for url in cwl_gen.get_json_links(version).annotator_urls: - ann_json = requests.get(url).json() - self.assertFalse(ann_json["arguments"]) +supported_versions = ["3.5", "3.6", "3.7", "3.8"] -# Integration tests +@pytest.mark.parametrize("version", supported_versions) +class TestGenerateCWL: + def test_get_json_links(self, version): + json_links = cwl_gen.get_json_links(version) + for link_type, links in json_links.__dict__.items(): + assert links, "There are no links of type '{}' in gatk version {}".format( + link_type, + version + ) -class TestRunsCorrectly(unittest.TestCase): - supported_versions = ["3.5", "current", "4.beta-latest"] - def test_runs(self): - for version in self.supported_versions: - run_command("python2 gatkcwlgenerator/main.py -v {} --dev".format(version)) + def test_no_arguments_in_annotator(self, version): + # If arguments are in annotator modules, we probably need to add them to the CWL file + for url in cwl_gen.get_json_links(version).annotator_urls: + ann_json = requests.get(url).json() + assert not ann_json["arguments"] -class TestGeneratedCWLFiles(unittest.TestCase): - base_cwl_path = path.join(base_dir, "cwl_files_3.5/cwl") +# Integration tests - def is_cwlfile_valid(self, cwl_file): - run_command("cwl-runner --validate " + path.join(self.base_cwl_path, cwl_file)) +@pytest.mark.parametrize("version", supported_versions + ["4.beta-latest"]) +def test_runs(version): + run_command("python2 gatkcwlgenerator/main.py -v {} --dev".format(version)) - @unittest.skip("") - def test_are_cwl_files_valid(self): +@pytest.mark.parametrize("version", supported_versions) +class TestGeneratedCWLFiles: + def get_base_cwl_path(self, version): + return path.join(base_dir, "cwl_files_{}/cwl".format(version)) + + def test_are_cwl_files_valid(self, version): exceptions = [] - for cwl_file in os.listdir(self.base_cwl_path): + for cwl_file in os.listdir(self.get_base_cwl_path(version)): try: print("Validated " + cwl_file) - run_command("cwl-runner --validate " + path.join(self.base_cwl_path, cwl_file)) + run_command("cwl-runner --validate " + path.join(self.get_base_cwl_path(version), cwl_file)) except AssertionError as e: print(e) exceptions.append(e) @@ -130,44 +126,35 @@ def test_are_cwl_files_valid(self): if exceptions: raise AssertionError("Not all cwl files are valid:\n" + "\n\n".join(exceptions)) - def test_haplotype_caller(self): + def test_haplotype_caller(self, version): run_command("cwl-runner cwl_files_3.5/cwl/HaplotypeCaller.cwl examples/HaplotypeCaller_inputs.yml") # Test if the haplotype caller accepts all the correct types - def test_boolean_type(self): - self.assertIn("ThreadEfficiencyMonitor", run_haplotype_caller("monitorThreadEfficiency: True").stderr) + def test_boolean_type(self, version): + assert "ThreadEfficiencyMonitor" in run_haplotype_caller("monitorThreadEfficiency: True", version).stderr - def test_integers_type(self): - self.assertIn("42 data thread", run_haplotype_caller("num_threads: 42", expect_failure=True).stderr) + def test_integers_type(self, version): + assert "42 data thread" in run_haplotype_caller("num_threads: 42", version, expect_failure=True).stderr - def test_string_type(self): - self.assertIn("Specified name does not exist in input bam files", - run_haplotype_caller("sample_name: invalid_sample_name", expect_failure=True).stderr) + def test_string_type(self, version): + assert "Specified name does not exist in input bam files" in \ + run_haplotype_caller("sample_name: invalid_sample_name", version, expect_failure=True).stderr - def test_file_type(self): + def test_file_type(self, version): BQSR_arg = """ BQSR: class: File path: {0}/cwl-example-data/chr22_cwl_test.fa """.format(base_dir) - self.assertIn("Bad input: The GATK report has an unknown/unsupported version in the header", - run_haplotype_caller(BQSR_arg, expect_failure=True).stderr) + assert "Bad input: The GATK report has an unknown/unsupported version in the header" in \ + run_haplotype_caller(BQSR_arg, version, expect_failure=True).stderr - def test_enum_type(self): - self.assertIn("Strictness is LENIENT", run_haplotype_caller("validation_strictness: LENIENT").stderr) + def test_enum_type(self, version): + assert "Strictness is LENIENT" in run_haplotype_caller("validation_strictness: LENIENT", version).stderr - def test_list_type(self): - run_with_larger_intervals = run_haplotype_caller( + def test_list_type(self, version): + run_with_larger_intervals = run_haplotype_caller(extra_info="", version=version, filetext="analysis_type: HaplotypeCaller\n" + default_args + "\nintervals: [chr22:10591400-10591500, chr22:10591500-10591645]") - self.assertIn("Processing 246 bp from intervals", run_with_larger_intervals.stderr) - -""" -The entry point for testing -""" -def test(): - unittest.main() - -if __name__ == "__main__": - test() \ No newline at end of file + assert "Processing 246 bp from intervals" in run_with_larger_intervals.stderr \ No newline at end of file diff --git a/setup.py b/setup.py index a1a1155..e8903fd 100644 --- a/setup.py +++ b/setup.py @@ -13,6 +13,7 @@ def read_markdown(file): version="1.0.0", packages=find_packages(exclude=["tests"]), install_requires=open("requirements.txt", "r").readlines(), + tests_require=open("test_requirements.txt", "r").readlines(), url="https://github.com/wtsi-hgi/gatk-cwl-generator", license="MIT", description="Generates CWL files from the GATK documentation Edit", diff --git a/test_requirements.txt b/test_requirements.txt new file mode 100644 index 0000000..9c4cd94 --- /dev/null +++ b/test_requirements.txt @@ -0,0 +1,3 @@ +pytest +pytest-xdist +requests_cache \ No newline at end of file