From 5414691af2d4f642d0ff2c10a91a3a2ac0a9d922 Mon Sep 17 00:00:00 2001 From: Mingye Wang Date: Thu, 30 Jan 2020 16:00:44 +0800 Subject: [PATCH 1/5] run_shell_command: Try to find bash on non-POSIX --- augur/utils.py | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/augur/utils.py b/augur/utils.py index 310249b5d..6b6b9e2e8 100644 --- a/augur/utils.py +++ b/augur/utils.py @@ -4,6 +4,7 @@ import os, json, sys import pandas as pd import subprocess +import shlex from treetime.utils import numeric_date from collections import defaultdict from pkg_resources import resource_stream @@ -531,6 +532,7 @@ def write_VCF_translation(prot_dict, vcf_file_name, ref_file_name): call = ["gzip", vcf_file_name[:-3]] run_shell_command(" ".join(call), raise_errors = True) +shquote = shlex.quote def run_shell_command(cmd, raise_errors = False, extra_env = None): """ @@ -544,21 +546,30 @@ def run_shell_command(cmd, raise_errors = False, extra_env = None): overlayed onto the default subprocess environment. """ env = os.environ.copy() + out = '' if extra_env: env.update(extra_env) try: + shargs = ['-c', "set -euo pipefail; " + cmd] + + if os.name == 'posix': + shellexec = ['/bin/bash'] + else: + # We try best effort on other systems. For now that means nt/java. + shellexec = ['env', 'bash'] + # Use check_call() instead of run() since the latter was added only in Python 3.5. - subprocess.check_call( - "set -euo pipefail; " + cmd, - shell = True, - executable = "/bin/bash", - env = env) + out = subprocess.check_output( + shellexec + shargs, + shell = False, + stderr=subprocess.STDOUT) except subprocess.CalledProcessError as error: print_error( - "shell exited {rc} when running: {cmd}{extra}", + "{out}\nshell exited {rc} when running: {cmd}{extra}", + out = out, rc = error.returncode, cmd = cmd, extra = "\nAre you sure this program is installed?" if error.returncode==127 else "", @@ -571,11 +582,13 @@ def run_shell_command(cmd, raise_errors = False, extra_env = None): except FileNotFoundError as error: print_error( """ + {out} Unable to run shell commands using {shell}! Augur requires {shell} to be installed. Please open an issue on GitHub if you need assistance. """, + out = out, shell = error.filename ) if raise_errors: From 1b2873905cc114c9ac0cb8526fec4dcead436d4f Mon Sep 17 00:00:00 2001 From: Mingye Wang Date: Thu, 30 Jan 2020 16:07:00 +0800 Subject: [PATCH 2/5] shell-quote all user-defined filenames --- augur/align.py | 6 ++++-- augur/filter.py | 6 +++--- augur/mask.py | 4 ++-- augur/tree.py | 10 +++++----- 4 files changed, 14 insertions(+), 12 deletions(-) diff --git a/augur/align.py b/augur/align.py index baf4f789d..e2e880943 100644 --- a/augur/align.py +++ b/augur/align.py @@ -6,7 +6,7 @@ from shutil import copyfile import numpy as np from Bio import AlignIO, SeqIO, Seq -from .utils import run_shell_command, nthreads_value +from .utils import run_shell_command, nthreads_value, shquote def make_gaps_ambiguous(aln): ''' @@ -94,7 +94,9 @@ def run(args): # align if args.method=='mafft': - cmd = "mafft --reorder --anysymbol --thread %d %s 1> %s 2> %s.log"%(args.nthreads, seq_fname, output, output) + shoutput = shquote(output) + shname = shquote(seq_fname) + cmd = "mafft --reorder --anysymbol --thread %d -- %s 1> %s 2> %s.log"%(args.nthreads, shname, shoutput, shoutput) print("\nusing mafft to align via:\n\t" + cmd + " \n\n\tKatoh et al, Nucleic Acid Research, vol 30, issue 14" "\n\thttps://doi.org/10.1093%2Fnar%2Fgkf436\n") diff --git a/augur/filter.py b/augur/filter.py index 186622669..40961511d 100644 --- a/augur/filter.py +++ b/augur/filter.py @@ -8,7 +8,7 @@ import random, os, re import numpy as np import sys -from .utils import read_metadata, get_numerical_dates, run_shell_command +from .utils import read_metadata, get_numerical_dates, run_shell_command, shquote comment_char = '#' @@ -30,8 +30,8 @@ def write_vcf(compressed, input_file, output_file, dropped_samps): inCall = "--gzvcf" if compressed else "--vcf" outCall = "| gzip -c" if output_file.lower().endswith('.gz') else "" - toDrop = " ".join(["--remove-indv "+s for s in dropped_samps]) - call = ["vcftools", toDrop, inCall, input_file, "--recode --stdout", outCall, ">", output_file] + toDrop = " ".join(["--remove-indv "+shquote(s) for s in dropped_samps]) + call = ["vcftools", toDrop, inCall, shquote(input_file), "--recode --stdout", outCall, ">", shquote(output_file)] print("Filtering samples using VCFTools with the call:") print(" ".join(call)) diff --git a/augur/mask.py b/augur/mask.py index 15d3416a8..b0f57e077 100644 --- a/augur/mask.py +++ b/augur/mask.py @@ -6,7 +6,7 @@ import pandas as pd import os import numpy as np -from .utils import run_shell_command +from .utils import run_shell_command, shquote def get_mask_sites(vcf_file, mask_file): ''' @@ -99,7 +99,7 @@ def run(args): in_file = args.sequences+"_temp" copyfile(args.sequences, in_file) - call = ["vcftools", "--exclude-positions", tempMaskFile, inCall, in_file, "--recode --stdout", outCall, ">", out_file] + call = ["vcftools", "--exclude-positions", shquote(tempMaskFile), inCall, shquote(in_file), "--recode --stdout", outCall, ">", shquote(out_file)] print("Removing masked sites from VCF file using vcftools... this may take some time. Call:") print(" ".join(call)) run_shell_command(" ".join(call), raise_errors = True) diff --git a/augur/tree.py b/augur/tree.py index 2f4e274c9..a26c30448 100644 --- a/augur/tree.py +++ b/augur/tree.py @@ -13,7 +13,7 @@ from treetime.vcf_utils import read_vcf from pathlib import Path -from .utils import run_shell_command, nthreads_value +from .utils import run_shell_command, nthreads_value, shquote def find_executable(names, default = None): """ @@ -58,7 +58,7 @@ def build_raxml(aln_file, out_file, clean_up=True, nthreads=1, tree_builder_args # RAxML_bestTree.4ed91a, RAxML_info.4ed91a, RAxML_parsimonyTree.4ed91a, RAxML_result.4ed91a random_string = uuid.uuid4().hex[0:6] - call = [raxml,"-T",str(nthreads)," -f d -m GTRCAT -c 25 -p 235813 -n %s -s"%(random_string), aln_file, tree_builder_args, "> RAxML_log.%s"%(random_string)] + call = [raxml,"-T",str(nthreads)," -f d -m GTRCAT -c 25 -p 235813 -n %s -s"%(random_string), shquote(aln_file), tree_builder_args, "> RAxML_log.%s"%(random_string)] cmd = " ".join(call) print("Building a tree via:\n\t" + cmd + "\n\tStamatakis, A: RAxML Version 8: A tool for Phylogenetic Analysis and Post-Analysis of Large Phylogenies." @@ -110,7 +110,7 @@ def build_fasttree(aln_file, out_file, clean_up=True, nthreads=1, tree_builder_a "OMP_NUM_THREADS": str(nthreads), } - call = [fasttree, "-nosupport", "-nt", aln_file, tree_builder_args, "1>", out_file, "2>", log_file] + call = [fasttree, "-nosupport", "-nt", shquote(aln_file), tree_builder_args, "1>", shquote(out_file), "2>", shquote(log_file)] cmd = " ".join(call) print("Building a tree via:\n\t" + cmd + "\n\tPrice et al: FastTree 2 - Approximately Maximum-Likelihood Trees for Large Alignments." + @@ -164,10 +164,10 @@ def build_iqtree(aln_file, out_file, substitution_model="GTR", clean_up=True, nt ] if substitution_model.lower() != "none": - call = ["iqtree", *fast_opts, "-nt", str(nthreads), "-s", tmp_aln_file, + call = ["iqtree", *fast_opts, "-nt", str(nthreads), "-s", shquote(tmp_aln_file), "-m", substitution_model, tree_builder_args, ">", log_file] else: - call = ["iqtree", *fast_opts, "-nt", str(nthreads), "-s", tmp_aln_file, tree_builder_args, ">", log_file] + call = ["iqtree", *fast_opts, "-nt", str(nthreads), "-s", shquote(tmp_aln_file), tree_builder_args, ">", shquote(log_file)] cmd = " ".join(call) From da5f16291ee660b6de14f6d2e571c791778bae53 Mon Sep 17 00:00:00 2001 From: Mingye Wang Date: Fri, 31 Jan 2020 02:43:28 +0800 Subject: [PATCH 3/5] align: no -- for mafft --- augur/align.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/align.py b/augur/align.py index e2e880943..18fab1e6a 100644 --- a/augur/align.py +++ b/augur/align.py @@ -96,7 +96,7 @@ def run(args): if args.method=='mafft': shoutput = shquote(output) shname = shquote(seq_fname) - cmd = "mafft --reorder --anysymbol --thread %d -- %s 1> %s 2> %s.log"%(args.nthreads, shname, shoutput, shoutput) + cmd = "mafft --reorder --anysymbol --thread %d %s 1> %s 2> %s.log"%(args.nthreads, shname, shoutput, shoutput) print("\nusing mafft to align via:\n\t" + cmd + " \n\n\tKatoh et al, Nucleic Acid Research, vol 30, issue 14" "\n\thttps://doi.org/10.1093%2Fnar%2Fgkf436\n") From ef904adfa6778f845d57c873e2a2d95a86534a58 Mon Sep 17 00:00:00 2001 From: Mingye Wang Date: Fri, 31 Jan 2020 18:11:57 +0800 Subject: [PATCH 4/5] Check_output is for the error only --- augur/utils.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/augur/utils.py b/augur/utils.py index 6b6b9e2e8..c2b057081 100644 --- a/augur/utils.py +++ b/augur/utils.py @@ -546,7 +546,6 @@ def run_shell_command(cmd, raise_errors = False, extra_env = None): overlayed onto the default subprocess environment. """ env = os.environ.copy() - out = '' if extra_env: env.update(extra_env) @@ -561,15 +560,16 @@ def run_shell_command(cmd, raise_errors = False, extra_env = None): shellexec = ['env', 'bash'] # Use check_call() instead of run() since the latter was added only in Python 3.5. - out = subprocess.check_output( + subprocess.check_output( shellexec + shargs, shell = False, - stderr=subprocess.STDOUT) + stderr = subprocess.STDOUT, + env = env) except subprocess.CalledProcessError as error: print_error( "{out}\nshell exited {rc} when running: {cmd}{extra}", - out = out, + out = error.output, rc = error.returncode, cmd = cmd, extra = "\nAre you sure this program is installed?" if error.returncode==127 else "", @@ -582,13 +582,11 @@ def run_shell_command(cmd, raise_errors = False, extra_env = None): except FileNotFoundError as error: print_error( """ - {out} Unable to run shell commands using {shell}! Augur requires {shell} to be installed. Please open an issue on GitHub if you need assistance. """, - out = out, shell = error.filename ) if raise_errors: From 6332017d916350b15f35f6f0429ed775a8a113b7 Mon Sep 17 00:00:00 2001 From: Mingye Wang Date: Fri, 31 Jan 2020 18:26:45 +0800 Subject: [PATCH 5/5] run_shell_command: don't rely on error.filename (missing on win32) --- augur/utils.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/augur/utils.py b/augur/utils.py index c2b057081..84e12752b 100644 --- a/augur/utils.py +++ b/augur/utils.py @@ -550,15 +550,15 @@ def run_shell_command(cmd, raise_errors = False, extra_env = None): if extra_env: env.update(extra_env) - try: - shargs = ['-c', "set -euo pipefail; " + cmd] + shargs = ['-c', "set -euo pipefail; " + cmd] - if os.name == 'posix': - shellexec = ['/bin/bash'] - else: - # We try best effort on other systems. For now that means nt/java. - shellexec = ['env', 'bash'] + if os.name == 'posix': + shellexec = ['/bin/bash'] + else: + # We try best effort on other systems. For now that means nt/java. + shellexec = ['env', 'bash'] + try: # Use check_call() instead of run() since the latter was added only in Python 3.5. subprocess.check_output( shellexec + shargs, @@ -587,7 +587,7 @@ def run_shell_command(cmd, raise_errors = False, extra_env = None): Augur requires {shell} to be installed. Please open an issue on GitHub if you need assistance. """, - shell = error.filename + shell = ' and '.join(shellexec) ) if raise_errors: raise