Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tree: RAxML bootstrapping #1127

Merged
merged 6 commits into from
Jan 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,17 @@
### Features

* titers: Support parsing of thresholded values (e.g., "<80" or ">2560"). [#1118][] (@huddlej)
* tree: Support bootstrapped trees generated with RAxML via user-provided `--tree-builder-args`. [#1127][] (@tsibley)

### Bug Fixes

* utils: Serialize common numpy data types in `write_json`. [#1119][] (@victorlin)
* tree: Suppress the `Cannot specify --substitution-model unless using IQTree` warning when `--substitution-model` is left at its default. [#1127][] (@tsibley)
* tree: Print the underlying error message when tree building fails. [#1127][] (@tsibley)

[#1118]: https://github.com/nextstrain/augur/pull/1118
[#1119]: https://github.com/nextstrain/augur/pull/1119
[#1127]: https://github.com/nextstrain/augur/pull/1127

## 19.2.0 (19 December 2022)

Expand Down
50 changes: 35 additions & 15 deletions augur/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from treetime.vcf_utils import read_vcf
from pathlib import Path

from .errors import AugurError
from .io.sequences import read_sequences
from .io.shell_command_runner import run_shell_command
from .io.vcf import shquote
Expand All @@ -40,6 +41,9 @@
"iqtree": "-ninit 2 -n 2 -me 0.05 -nt AUTO -redo",
}

# IQ-TREE only; see usage below
DEFAULT_SUBSTITUTION_MODEL = "GTR"

class ConflictingArgumentsException(Exception):
"""Exception when user-provided tree builder arguments conflict with the
requested tree builder's hardcoded defaults (e.g., the path to the
Expand Down Expand Up @@ -108,7 +112,7 @@ def find_executable(names, default = None):

def build_raxml(aln_file, out_file, clean_up=True, nthreads=1, tree_builder_args=None):
'''
build tree using RAxML with parameters '-f d -m GTRCAT -c 25 -p 235813 -n tre"
build tree using RAxML
'''

raxml = find_executable([
Expand Down Expand Up @@ -142,16 +146,30 @@ def build_raxml(aln_file, out_file, clean_up=True, nthreads=1, tree_builder_args
"\n\tIn Bioinformatics, 2014\n")
try:
run_shell_command(cmd, raise_errors = True)
shutil.copy("RAxML_bestTree.%s"%(random_string), out_file)

# When bootstrapping is enabled by custom tree_builder_args, RAxML
# outputs the tree with support values to a different file,
# "bipartitions". If it exists, then use it; otherwise use "bestTree".
possible_tree_files = [
Path(f"RAxML_bipartitions.{random_string}"), # best-scoring ML tree with support values
Path(f"RAxML_bestTree.{random_string}"), # best-scoring ML tree
]

tree = next((t for t in possible_tree_files if t.exists()), None)

if not tree:
raise AugurError(f"No RAxML output tree files found; looked for: {', '.join(map(repr, map(str, possible_tree_files)))}")

shutil.copy(str(tree), out_file)
T = Phylo.read(out_file, 'newick')

if clean_up:
os.remove("RAxML_bestTree.%s"%(random_string))
os.remove("RAxML_info.%s"%(random_string))
os.remove("RAxML_parsimonyTree.%s"%(random_string))
os.remove("RAxML_result.%s"%(random_string))
for f in Path().glob(f"RAxML_*.{random_string}"):
f.unlink()

except:
except Exception as error:
print("ERROR: TREE BUILDING FAILED")
print(f"ERROR: {error}")
if os.path.isfile("RAxML_log.%s"%(random_string)):
print("Please see the log file for more details: {}".format("RAxML_log.%s"%(random_string)))
T=None
Expand All @@ -161,7 +179,7 @@ def build_raxml(aln_file, out_file, clean_up=True, nthreads=1, tree_builder_args

def build_fasttree(aln_file, out_file, clean_up=True, nthreads=1, tree_builder_args=None):
'''
build tree using fasttree with parameters "-nt"
build tree using fasttree
'''
log_file = out_file + ".log"

Expand Down Expand Up @@ -195,8 +213,9 @@ def build_fasttree(aln_file, out_file, clean_up=True, nthreads=1, tree_builder_a
try:
run_shell_command(cmd, raise_errors = True, extra_env = extra_env)
T = Phylo.read(out_file, 'newick')
except:
except Exception as error:
print("ERROR: TREE BUILDING FAILED")
print(f"ERROR: {error}")
if os.path.isfile(log_file):
print("Please see the log file for more details: {}".format(log_file))
T=None
Expand All @@ -206,7 +225,7 @@ def build_fasttree(aln_file, out_file, clean_up=True, nthreads=1, tree_builder_a

def build_iqtree(aln_file, out_file, substitution_model="GTR", clean_up=True, nthreads=1, tree_builder_args=None):
'''
build tree using IQ-Tree with parameters "-fast"
build tree using IQ-Tree
arguments:
aln_file file name of input aligment
out_file file name to write tree to
Expand Down Expand Up @@ -273,8 +292,9 @@ def random_string(n):
for ext in [".bionj",".ckp.gz",".iqtree",".mldist",".model.gz",".treefile",".uniqueseq.phy",".model"]:
if os.path.isfile(tmp_aln_file + ext):
os.remove(tmp_aln_file + ext)
except:
except Exception as error:
print("ERROR: TREE BUILDING FAILED")
print(f"ERROR: {error}")
if os.path.isfile(log_file):
print("Please see the log file for more details: {}".format(log_file))
T=None
Expand Down Expand Up @@ -396,8 +416,8 @@ def register_parser(parent_subparsers):
parser.add_argument('--alignment', '-a', required=True, help="alignment in fasta or VCF format")
parser.add_argument('--method', default='iqtree', choices=["fasttree", "raxml", "iqtree"], help="tree builder to use")
parser.add_argument('--output', '-o', type=str, help='file name to write tree to')
parser.add_argument('--substitution-model', default="GTR",
help='substitution model to use. Specify \'auto\' to run ModelTest. Currently, only available for IQTREE.')
parser.add_argument('--substitution-model', default=DEFAULT_SUBSTITUTION_MODEL,
help='substitution model to use. Specify \'auto\' to run ModelTest. Currently, only available for IQ-TREE.')
parser.add_argument('--nthreads', type=nthreads_value, default=1,
help="maximum number of threads to use; specifying the value 'auto' will cause the number of available CPU cores on your system, if determinable, to be used")
parser.add_argument('--vcf-reference', type=str, help='fasta file of the sequence the VCF was mapped to')
Expand Down Expand Up @@ -451,8 +471,8 @@ def run(args):
else:
fasta = aln

if args.substitution_model and not args.method=='iqtree':
print("Cannot specify model unless using IQTree. Model specification ignored.")
if args.method != "iqtree" and args.substitution_model is not DEFAULT_SUBSTITUTION_MODEL:
print(f"Cannot specify --substitution-model unless using IQ-TREE. Model specification {args.substitution_model!r} ignored.", file=sys.stderr)

# Allow users to keep default args, override them, or augment them.
if args.tree_builder_args is None:
Expand Down