Skip to content

Commit

Permalink
Migration bcbio-variation to conda shell program
Browse files Browse the repository at this point in the history
Remove requirement for custom install of bcbio-variation, instead
using a shell script installed by conda. Update documentation for
MuTect and GATK installs. Removes last requirements for non-conda
installs.
  • Loading branch information
chapmanb committed Jan 5, 2016
1 parent 35d6871 commit 5da92ae
Show file tree
Hide file tree
Showing 6 changed files with 25 additions and 51 deletions.
27 changes: 7 additions & 20 deletions bcbio/provenance/programs.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,11 @@
{"cmd": "qualimap", "args": "-h", "stdout_flag": "QualiMap"},
{"cmd": "vcflib", "has_cl_version": False},
{"cmd": "featurecounts", "args": "-v", "stdout_flag": "featureCounts"}]
_manifest_progs = ["BubbleTree", "cufflinks-binary", "cnvkit", "gatk-framework", "grabix", "htseq",
"lumpy-sv", "manta", "metasv", "phylowgs", "platypus-variant", "rna-star",
"rtg-tools","sambamba-binary", "samblaster", "scalpel", "vardict",
"vardict-java", "vep", "vt", "wham"]
_manifest_progs = ["bcbio-variation", "bioconductor-bubbletree", "cufflinks", "cnvkit", "gatk-framework",
"grabix", "htseq", "lumpy-sv", "manta", "metasv", "oncofuse",
"picard", "phylowgs", "platypus-variant",
"rna-star", "rtg-tools", "sambamba", "samblaster", "scalpel", "snpeff", "vardict",
"vardict-java", "varscan", "variant-effect-predictor", "vt", "wham"]

def _broad_versioner(type):
def get_version(config):
Expand All @@ -47,8 +48,6 @@ def get_version(config):
return ""
if type == "gatk":
return runner.get_gatk_version()
elif type == "picard":
return runner.get_picard_version("ViewSam")
elif type == "mutect":
try:
runner = broad.runner_from_config(config, "mutect")
Expand Down Expand Up @@ -91,21 +90,9 @@ def get_version(config):
return _get_cl_version(kwargs, config)
return get_version

_alt_progs = [{"name": "bcbio_variation",
"version_fn": jar_versioner("bcbio_variation", "bcbio.variation")},
{"name": "gatk", "version_fn": _broad_versioner("gatk")},
_alt_progs = [{"name": "gatk", "version_fn": _broad_versioner("gatk")},
{"name": "mutect",
"version_fn": _broad_versioner("mutect")},
{"name": "picard", "version_fn": _broad_versioner("picard")},
{"name": "snpeff",
"version_fn": java_versioner("snpeff", "snpEff", stdout_flag="snpEff version SnpEff")},
{"name": "varscan",
"version_fn": jar_versioner("varscan", "VarScan")},
{"name": "oncofuse",
"version_fn": jar_versioner("Oncofuse", "Oncofuse")},
{"name": "alientrimmer",
"version_fn": jar_versioner("AlienTrimmer", "AlienTrimmer")}
]
"version_fn": _broad_versioner("mutect")}]

def _parse_from_stdoutflag(stdout, x):
for line in stdout:
Expand Down
6 changes: 2 additions & 4 deletions bcbio/variation/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,13 +126,11 @@ def _bcbio_variation_ensemble(vrn_files, out_file, ref_file, config_file, base_d
"""
vrn_files = [_handle_somatic_ensemble(v, data) for v in vrn_files]
tmp_dir = utils.safe_makedir(os.path.join(base_dir, "tmp"))
bv_jar = config_utils.get_jar("bcbio.variation",
config_utils.get_program("bcbio_variation", data["config"], "dir"))
resources = config_utils.get_resources("bcbio_variation", data["config"])
jvm_opts = resources.get("jvm_opts", ["-Xms750m", "-Xmx2g"])
java_args = ["-Djava.io.tmpdir=%s" % tmp_dir]
cmd = ["java"] + jvm_opts + java_args + ["-jar", bv_jar, "variant-ensemble", config_file,
ref_file, out_file] + vrn_files
cmd = ["bcbio-variation"] + jvm_opts + java_args + \
["variant-ensemble", config_file, ref_file, out_file] + vrn_files
with utils.chdir(base_dir):
do.run(cmd, "Ensemble calling: %s" % os.path.basename(base_dir))

Expand Down
7 changes: 2 additions & 5 deletions bcbio/variation/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,13 +284,10 @@ def bcbio_variation_comparison(config_file, base_dir, data):
"""Run a variant comparison using the bcbio.variation toolkit, given an input configuration.
"""
tmp_dir = utils.safe_makedir(os.path.join(base_dir, "tmp"))
bv_jar = config_utils.get_jar("bcbio.variation",
config_utils.get_program("bcbio_variation",
data["config"], "dir"))
resources = config_utils.get_resources("bcbio_variation", data["config"])
jvm_opts = resources.get("jvm_opts", ["-Xms750m", "-Xmx2g"])
cmd = ["java"] + jvm_opts + broad.get_default_jvm_opts(tmp_dir) + \
["-jar", bv_jar, "variant-compare", config_file]
cmd = ["bcbio-variation"] + jvm_opts + broad.get_default_jvm_opts(tmp_dir) + \
["variant-compare", config_file]
do.run(cmd, "Comparing variant calls using bcbio.variation", data)

def _create_validate_config_file(vrn_file, rm_file, rm_interval_file,
Expand Down
6 changes: 2 additions & 4 deletions bcbio/variation/vfilter.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,13 +108,11 @@ def _freebayes_custom(in_file, ref_file, data):
out_file = "%s-filter%s" % os.path.splitext(in_file)
if not utils.file_exists(out_file):
tmp_dir = utils.safe_makedir(os.path.join(os.path.dirname(in_file), "tmp"))
bv_jar = config_utils.get_jar("bcbio.variation",
config_utils.get_program("bcbio_variation", config, "dir"))
resources = config_utils.get_resources("bcbio_variation", config)
jvm_opts = resources.get("jvm_opts", ["-Xms750m", "-Xmx2g"])
java_args = ["-Djava.io.tmpdir=%s" % tmp_dir]
cmd = ["java"] + jvm_opts + java_args + ["-jar", bv_jar, "variant-filter", "freebayes",
in_file, ref_file]
cmd = ["bcbio-variation"] + jvm_opts + java_args + \
["variant-filter", "freebayes", in_file, ref_file]
do.run(cmd, "Custom FreeBayes filtering using bcbio.variation")
return out_file

Expand Down
8 changes: 0 additions & 8 deletions config/bcbio_system.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
# value is the maximum cores that should be allocated for a program.
# - jvm_opts: specify details
# - cmd: Command to launch the program, if not located on PATH.
# - dir: Directory containing program associated data. Especially useful for
# java jars
resources:
# default options, used if other items below are not present
# avoids needing to configure/adjust for every program
Expand Down Expand Up @@ -49,32 +47,26 @@ resources:
jvm_opts: ["-Xms750m", "-Xmx6g"]
gatk:
jvm_opts: ["-Xms500m", "-Xmx3500m"]
dir: /usr/local/share/java/gatk
gatk-haplotype:
jvm_opts: ["-Xms500m", "-Xmx3500m"]
gatk-vqsr:
jvm_opts: ["-Xms500m", "-Xmx3500m"]
picard:
jvm_opts: ["-Xms750m", "-Xmx3500m"]
dir: /usr/local/share/java/picard
snpeff:
jvm_opts: ["-Xms750m", "-Xmx6g"]
bcbio_variation:
jvm_opts: ["-Xms750m", "-Xmx2500m"]
dir: /usr/local/share/java/bcbio_variation
mutect:
jvm_opts: ["-Xms750m", "-Xmx2500m"]
dir: /usr/local/share/java/mutect
miraligner:
jvm_opts: ["-Xms750m", "-Xmx4500m"]
varscan:
jvm_opts: ["-Xms750m", "-Xmx2000m"]
dir: /usr/local/share/java/varscan
vardict:
jvm_opts: ["-Xms750m", "-Xmx3000m"]
oncofuse:
jvm_opts: ["-Xms750m", "-Xmx2000m"]
dir: /usr/local/share/java/oncofuse
express:
memory: 8g
dexseq:
Expand Down
22 changes: 12 additions & 10 deletions docs/contents/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -158,24 +158,26 @@ GATK and muTect

Calling variants with GATK's HaplotypeCaller or UnifiedGenotyper requires manual
installation of the latest GATK release. This is freely available for academic
users, but requires a manual download from the `GATK download`_ site. Appistry
provides `a distribution of GATK for commercial users`_. If you don't want to use the restricted
GATK version, freely available callers like FreeBayes provide a better
alternative than using older GATK versions. See the `FreeBayes and GATK comparison`_
for a full evaluation.
users, but requires a `license for commerical use
<https://www.broadinstitute.org/gatk/about/#licensing>`_. It is not freely
redistributable so requires a manual download from the `GATK download`_ site. If
you don't want to use the restricted GATK version, freely available callers like
FreeBayes provide a better alternative than using older GATK versions. See the
`FreeBayes and GATK comparison`_ for a full evaluation.

To install GATK, download and unzip the latest version from the GATK or Appistry
distributions. Then make this jar available to bcbio-nextgen with::
To install GATK, download and unzip the latest version from the GATK
distribution. Then make this jar available to bcbio-nextgen with::

bcbio_nextgen.py upgrade --tools --toolplus gatk=/path/to/gatk/GenomeAnalysisTK.jar

This will copy the jar and update your bcbio_system.yaml and manifest files to
reflect the new version.

For muTect, we provide the latest 1.1.5 jar, but commercial users need to obtain
the Appistry muTect distribution. To make this jar available to bcbio-nextgen::
MuTect also has similar licensing terms and requires a license for commerical
use. After `downloading the MuTect jar
<https://www.broadinstitute.org/gatk/download/>`_, make it available to bcbio::

bcbio_nextgen.py upgrade --tools --toolplus mutect=/path/to/appistry/muTect-1.1.5.jar
bcbio_nextgen.py upgrade --tools --toolplus mutect=/path/to/mutect/mutect-1.1.7.jar

Note that muTect does not provide an easy way to query for the current version,
so your input jar needs to include the version in the name.
Expand Down

0 comments on commit 5da92ae

Please sign in to comment.