diff --git a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.3.2-foss-2023a-CUDA-12.1.1.eb b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.3.2-foss-2023a-CUDA-12.1.1.eb new file mode 100644 index 000000000000..3fe9e6719f66 --- /dev/null +++ b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.3.2-foss-2023a-CUDA-12.1.1.eb @@ -0,0 +1,167 @@ +easyblock = 'PythonBundle' + +name = 'AlphaFold' +version = '2.3.2' +versionsuffix = '-CUDA-%(cudaver)s' + +homepage = 'https://deepmind.com/research/case-studies/alphafold' +description = "AlphaFold can predict protein structures with atomic accuracy even where no similar structure is known" + +toolchain = {'name': 'foss', 'version': '2023a'} + +builddependencies = [ + ('poetry', '1.5.1') +] + +dependencies = [ + ('Python', '3.11.3'), + ('CUDA', '12.1.1', '', SYSTEM), + ('SciPy-bundle', '2023.07'), + ('PyYAML', '6.0'), + ('TensorFlow', '2.13.0'), # doesn't require TF-gpu + ('Biopython', '1.83'), + ('HH-suite', '3.3.0'), + ('HMMER', '3.4'), + ('Kalign', '3.4.0'), + ('jax', '0.4.25', versionsuffix), # also provides absl-py # requirement is ==0.3.25! + ('UCX-CUDA', '1.14.1', versionsuffix), + ('cuDNN', '8.9.2.26', versionsuffix, SYSTEM), + ('NCCL', '2.18.3', versionsuffix), + ('OpenMM', '8.0.0', versionsuffix), + ('dm-tree', '0.1.8'), + ('dm-haiku', '0.0.12', versionsuffix), +] + +# commit to use for downloading stereo_chemical_props.txt and copy to alphafold/common, +# see docker/Dockerfile in AlphaFold repository +local_scp_commit = '7102c6' + +components = [ + ('stereo_chemical_props.txt', local_scp_commit, { + 'easyblock': 'Binary', + 'source_urls': [ + 'https://git.scicore.unibas.ch/schwede/openstructure/-/raw/%s/modules/mol/alg/src/' % local_scp_commit, + ], + 'sources': [ + { + 'download_filename': 'stereo_chemical_props.txt', + 'filename': 'stereo_chemical_props-%s.txt' % local_scp_commit, + 'extract_cmd': "cp %s ./stereo_chemical_props.txt", + } + ], + 'checksums': [ + '24510899eeb49167cffedec8fa45363a4d08279c0c637a403b452f7d0ac09451', # stereo_chemical_props-7102c6.txt + ] + }) +] + +use_pip = True + +exts_list = [ + ('PDBFixer', '1.9', { + 'source_urls': ['https://github.com/openmm/pdbfixer/archive/refs/tags/'], + 'sources': [{'download_filename': '%(version)s.tar.gz', 'filename': SOURCE_TAR_GZ}], + 'checksums': ['88b9a77e50655f89d0eb2075093773e82c27a4cef842cb7d735c877b20cd39fb'], + }), + ('tabulate', '0.9.0', { + 'checksums': ['0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c'], + }), + ('websocket-client', '1.5.1', { + 'modulename': 'websocket', + 'checksums': ['3f09e6d8230892547132177f575a4e3e73cfdf06526e20cc02aa1c3b47184d40'], + }), + ('docker', '7.0.0', { + 'checksums': ['323736fb92cd9418fc5e7133bc953e11a9da04f4483f828b527db553f1e7e5a3'], + }), + ('immutabledict', '4.1.0', { + 'checksums': ['93d100ccd2cd09a1fd3f136b9328c6e59529ba341de8bb499437f6819159fe8a'], + }), + ('contextlib2', '21.6.0', { + 'checksums': ['ab1e2bfe1d01d968e1b7e8d9023bc51ef3509bba217bb730cee3827e1ee82869'], + }), + ('ml_collections', '0.1.1', { + 'preinstallopts': "touch requirements.txt && touch requirements-test.txt && ", + 'checksums': ['3fefcc72ec433aa1e5d32307a3e474bbb67f405be814ea52a2166bfc9dbe68cc'], + }), + (name, version, { + 'patches': [ + 'AlphaFold-2.0.0_fix-packages.patch', + 'AlphaFold-2.3.2_data-dep-paths-shebang-UniRef30.patch', + 'AlphaFold-2.0.0_n-cpu.patch', + 'AlphaFold-2.0.1_setup_rm_tfcpu.patch', + 'AlphaFold-2.3.2_use_openmm_8.0.0.patch', + 'AlphaFold-2.3.2_BioPythonPDBData.patch', + ], + 'source_urls': ['https://github.com/deepmind/alphafold/archive/refs/tags/'], + 'sources': [{'download_filename': 'v%(version)s.tar.gz', 'filename': SOURCE_TAR_GZ}], + 'checksums': [ + {'AlphaFold-2.3.2.tar.gz': '4ea8005ba1b573fa1585e4c29b7d188c5cbfa59b4e4761c9f0c15c9db9584a8e'}, + {'AlphaFold-2.0.0_fix-packages.patch': '826d2d1a5d6ac52c51a60ba210e1947d5631a1e2d76f8815305b5d23f74458db'}, + {'AlphaFold-2.3.2_data-dep-paths-shebang-UniRef30.patch': + '58cd0ce4094afe76909649abe68034c4fbdb500967f5c818f49b530356dc012b'}, + {'AlphaFold-2.0.0_n-cpu.patch': 'dfda4dd5f9aba19fe2b6eb9a0ec583d12dcefdfee8ab8803fc57ad48d582db04'}, + {'AlphaFold-2.0.1_setup_rm_tfcpu.patch': + '1a2e4e843bd9a4d15ee39e6c37cc63ba281311cc7a0a5610f0e43b52ef93faac'}, + {'AlphaFold-2.3.2_use_openmm_8.0.0.patch': + 'bbef940c0c959040aaf3984ec47777a229c164517b54616a2688d58fae636d84'}, + {'AlphaFold-2.3.2_BioPythonPDBData.patch': + 'e4483a525ae5c4dc5a5f633bed8cf5337c329e64b603ab7b684a9d18cd26a22f'}, + ], + }), +] + +local_pylibdir = '%(installdir)s/lib/python%(pyshortver)s/site-packages' +local_link_scp = 'ln -s %%(installdir)s/stereo_chemical_props.txt %s/alphafold/common' % local_pylibdir + +postinstallcmds = [ + 'cp %(builddir)s/AlphaFold/alphafold-%(version)s/run_alphafold*.py %(installdir)s/bin', + 'cp -rpP %(builddir)s/AlphaFold/alphafold-%(version)s/scripts %(installdir)s', + 'cd %(installdir)s/bin && ln -s run_alphafold.py alphafold', + 'chmod a+x %(installdir)s/bin/run_alphafold.py', + local_link_scp, +] + +sanity_check_paths = { + 'files': ['bin/alphafold', 'bin/pdbfixer', 'bin/run_alphafold.py', 'stereo_chemical_props.txt'], + 'dirs': ['lib/python%(pyshortver)s/site-packages', 'scripts'], +} + +sanity_check_commands = [ + "pdbfixer --help", + "python -m openmm.testInstallation", + "python -c 'import alphafold'", + "alphafold --help 2>&1 | grep 'Full AlphaFold protein structure prediction script'", + "python %(installdir)s/bin/run_alphafold_test.py", +] + +sanity_pip_check = True + +# these allow to make predictions on proteins that would typically be too long to fit into GPU memory; +# see https://github.com/deepmind/alphafold/blob/main/docker/run_docker.py +modextravars = { + # these allow to make predictions on proteins that would typically be too long to fit into GPU memory; + # see https://github.com/deepmind/alphafold/blob/main/docker/run_docker.py + 'TF_FORCE_UNIFIED_MEMORY': '1', + # jaxlib 0.4.1: https://jax.readthedocs.io/en/latest/changelog.html#jaxlib-0-4-1-dec-13-2022 + # "The behavior of XLA_PYTHON_CLIENT_MEM_FRACTION=.XX has been changed to allocate XX% of the total GPU memory + # instead of the previous behavior of using currently available GPU memory to calculate preallocation. Please refer + # to GPU memory allocation for more details." + # https://jax.readthedocs.io/en/latest/gpu_memory_allocation.html + 'XLA_PYTHON_CLIENT_MEM_FRACTION': '2.5', + # + # Download with $EBROOTALPHAFOLD/scripts/download_all_data.sh /path/to/AlphaFold_DBs/$EBVERSIONALPHAFOLD + 'ALPHAFOLD_DATA_DIR': '/path/to/AlphaFold_DBs/%(versions)s', # please adapt + # Adapt in order to use a different version of UniRef30 by default, + # e.g., v2023_02 from https://wwwuser.gwdg.de/~compbiol/uniclust/2023_02/UniRef30_2023_02_hhsuite.tar.gz: + 'ALPHAFOLD_UNIREF30_VER': '2021_03', + 'OPENMM_RELAX': 'CUDA' # unset or set to 'CPU' in order not to run the energy minimization on GPU; PR#189 +} + +postinstallmsgs = [ + "A newer version of UniRef30 (2023_02) is available at: " + "https://wwwuser.gwdg.de/~compbiol/uniclust/2023_02/UniRef30_2023_02_hhsuite.tar.gz. " + "Untar to $ALPHAFOLD_DATA_DIR/uniref30/ and set the default version accordingly by changing " + "modextravars:ALPHAFOLD_UNIREF30_VER." +] + +moduleclass = 'bio' diff --git a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.3.2_BioPythonPDBData.patch b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.3.2_BioPythonPDBData.patch new file mode 100644 index 000000000000..df73873cb1ce --- /dev/null +++ b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.3.2_BioPythonPDBData.patch @@ -0,0 +1,14 @@ +# Thomas Hoffmann, EMBL Heidelberg, structures-it@embl.de, 2024/10 +# BioPython 1.83 does not provide protein_letters_3to1 in Bio.Data.SCOPdata but in Bio.Data.PDBData (and Bio.Data.IUPACData) +diff -ru -ru alphafold-2.3.2/alphafold/data/mmcif_parsing.py alphafold-2.3.2_BioPythonSCOPData/alphafold/data/mmcif_parsing.py +--- alphafold-2.3.2/alphafold/data/mmcif_parsing.py 2024-02-19 09:55:16.359778490 +0100 ++++ alphafold-2.3.2_BioPythonSCOPData/alphafold/data/mmcif_parsing.py 2023-03-27 13:50:49.000000000 +0200 +@@ -21,7 +21,7 @@ + + from absl import logging + from Bio import PDB +-from Bio.Data import SCOPData ++from Bio.Data import PDBData as SCOPData + + # Type aliases: + ChainId = str diff --git a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.3.2_data-dep-paths-shebang-UniRef30.patch b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.3.2_data-dep-paths-shebang-UniRef30.patch new file mode 100644 index 000000000000..c7bbd59ac0c1 --- /dev/null +++ b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.3.2_data-dep-paths-shebang-UniRef30.patch @@ -0,0 +1,164 @@ +pick up on $ALPHAFOLD_DATA_DIR to specify location to downloaded data +(see https://github.com/deepmind/alphafold/blob/main/docker/run_docker.py); +pick up on HH-suite, HHMER, Kalign dependencies provided via EasyBuild +author: Kenneth Hoste (HPC-UGent) +update 2.0.1 -> 2.1.0/2.1.2/2.3.0/2.3.2: Thomas Hoffmann (EMBL); +uniref30 version env. variable (THEMBL) + +diff -ru alphafold-2.3.2/run_alphafold.py alphafold-2.3.2_data-dep-paths-shebang-UniRef30/run_alphafold.py +--- alphafold-2.3.2/run_alphafold.py 2023-03-27 13:50:49.000000000 +0200 ++++ alphafold-2.3.2_data-dep-paths-shebang-UniRef30/run_alphafold.py 2024-10-11 11:34:06.330278962 +0200 +@@ -1,3 +1,4 @@ ++#!/usr/bin/env python + # Copyright 2021 DeepMind Technologies Limited + # + # Licensed under the Apache License, Version 2.0 (the "License"); +@@ -42,6 +43,48 @@ + import numpy as np + + # Internal import (7716). ++use_reduced_dbs = any("--db_preset=reduced_dbs" in s for s in sys.argv[1:]) ++use_monomer_preset = not any("--model_preset=multimer" in s for s in sys.argv[1:]) ++ ++data_dir = os.getenv('ALPHAFOLD_DATA_DIR') ++use_gpu_relax = os.getenv('OPENMM_RELAX')=='CUDA' ++uniref30_ver = os.getenv('ALPHAFOLD_UNIREF30_VER') ++if not uniref30_ver: uniref30_ver = '2021_03' ++ ++if data_dir: ++ mgnify_database_path = os.path.join(data_dir, 'mgnify', 'mgy_clusters_2022_05.fa') ++ uniref90_database_path = os.path.join(data_dir, 'uniref90', 'uniref90.fasta') ++ template_mmcif_dir = os.path.join(data_dir, 'pdb_mmcif', 'mmcif_files') ++ obsolete_pdbs_path = os.path.join(data_dir, 'pdb_mmcif', 'obsolete.dat') ++ if use_monomer_preset: ++ pdb_seqres_database_path = None ++ uniprot_database_path = None ++ pdb70_database_path = os.path.join(data_dir, 'pdb70', 'pdb70') ++ else: ++ pdb_seqres_database_path = os.path.join(data_dir, 'pdb_seqres', 'pdb_seqres.txt') ++ uniprot_database_path = os.path.join(data_dir, 'uniprot', 'uniprot.fasta') ++ pdb70_database_path = None ++ if use_reduced_dbs: ++ small_bfd_database_path = os.path.join(data_dir, 'small_bfd','bfd-first_non_consensus_sequences.fasta') ++ uniref30_database_path = None ++ bfd_database_path = None ++ else: ++ small_bfd_database_path = None ++ bfd_database_path = os.path.join(data_dir, 'bfd', 'bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt') ++ uniref30_database_path = os.path.join(data_dir, 'uniref30', 'UniRef30_%s' % uniref30_ver) ++else: ++ sys.stderr.write("$ALPHAFOLD_DATA_DIR is not defined!") ++ uniref90_database_path = None ++ mgnify_database_path = None ++ bfd_database_path = None ++ uniref30_database_path = None ++ pdb70_database_path = None ++ template_mmcif_dir = None ++ obsolete_pdbs_path = None ++ small_bfd_database_path = None ++ uniprot_database_path = None ++ pdb_seqres_database_path = None ++ use_gpu_relax = None + + logging.set_verbosity(logging.INFO) + +@@ -59,7 +102,7 @@ + 'separated by commas. All FASTA paths must have a unique basename as the ' + 'basename is used to name the output directories for each prediction.') + +-flags.DEFINE_string('data_dir', None, 'Path to directory of supporting data.') ++flags.DEFINE_string('data_dir', data_dir, 'Path to directory of supporting data.') + flags.DEFINE_string('output_dir', None, 'Path to a directory that will ' + 'store the results.') + flags.DEFINE_string('jackhmmer_binary_path', shutil.which('jackhmmer'), +@@ -71,32 +114,32 @@ + flags.DEFINE_string('hmmsearch_binary_path', shutil.which('hmmsearch'), + 'Path to the hmmsearch executable.') + flags.DEFINE_string('hmmbuild_binary_path', shutil.which('hmmbuild'), +- 'Path to the hmmbuild executable.') ++ 'Path to the hmmbuild executable.') + flags.DEFINE_string('kalign_binary_path', shutil.which('kalign'), +- 'Path to the Kalign executable.') +-flags.DEFINE_string('uniref90_database_path', None, 'Path to the Uniref90 ' +- 'database for use by JackHMMER.') +-flags.DEFINE_string('mgnify_database_path', None, 'Path to the MGnify ' +- 'database for use by JackHMMER.') +-flags.DEFINE_string('bfd_database_path', None, 'Path to the BFD ' +- 'database for use by HHblits.') +-flags.DEFINE_string('small_bfd_database_path', None, 'Path to the small ' +- 'version of BFD used with the "reduced_dbs" preset.') +-flags.DEFINE_string('uniref30_database_path', None, 'Path to the UniRef30 ' +- 'database for use by HHblits.') +-flags.DEFINE_string('uniprot_database_path', None, 'Path to the Uniprot ' +- 'database for use by JackHMMer.') +-flags.DEFINE_string('pdb70_database_path', None, 'Path to the PDB70 ' +- 'database for use by HHsearch.') +-flags.DEFINE_string('pdb_seqres_database_path', None, 'Path to the PDB ' +- 'seqres database for use by hmmsearch.') +-flags.DEFINE_string('template_mmcif_dir', None, 'Path to a directory with ' +- 'template mmCIF structures, each named .cif') ++ 'Path to the Kalign executable.') ++flags.DEFINE_string('uniref90_database_path', uniref90_database_path, 'Path to the Uniref90 ' ++ 'database for use by JackHMMER.') ++flags.DEFINE_string('mgnify_database_path', mgnify_database_path, 'Path to the MGnify ' ++ 'database for use by JackHMMER.') ++flags.DEFINE_string('bfd_database_path', bfd_database_path, 'Path to the BFD ' ++ 'database for use by HHblits.') ++flags.DEFINE_string('small_bfd_database_path', small_bfd_database_path, 'Path to the small ' ++ 'version of BFD used with the "reduced_dbs" preset.') ++flags.DEFINE_string('uniref30_database_path', uniref30_database_path, 'Path to the UniRef30 ' ++ 'database for use by HHblits.') ++flags.DEFINE_string('uniprot_database_path', uniprot_database_path, 'Path to the Uniprot ' ++ 'database for use by JackHMMer.') ++flags.DEFINE_string('pdb70_database_path', pdb70_database_path, 'Path to the PDB70 ' ++ 'database for use by HHsearch.') ++flags.DEFINE_string('pdb_seqres_database_path', pdb_seqres_database_path, 'Path to the PDB ' ++ 'seqres database for use by hmmsearch.') ++flags.DEFINE_string('template_mmcif_dir', template_mmcif_dir, 'Path to a directory with ' ++ 'template mmCIF structures, each named .cif') + flags.DEFINE_string('max_template_date', None, 'Maximum template release date ' +- 'to consider. Important if folding historical test sets.') +-flags.DEFINE_string('obsolete_pdbs_path', None, 'Path to file containing a ' +- 'mapping from obsolete PDB IDs to the PDB IDs of their ' +- 'replacements.') ++ 'to consider. Important if folding historical test sets.') ++flags.DEFINE_string('obsolete_pdbs_path', obsolete_pdbs_path, 'Path to file containing a ' ++ 'mapping from obsolete PDB IDs to the PDB IDs of their ' ++ 'replacements.') + flags.DEFINE_enum('db_preset', 'full_dbs', + ['full_dbs', 'reduced_dbs'], + 'Choose preset MSA database configuration - ' +@@ -137,7 +180,7 @@ + 'distracting stereochemical violations but might help ' + 'in case you are having issues with the relaxation ' + 'stage.') +-flags.DEFINE_boolean('use_gpu_relax', None, 'Whether to relax on GPU. ' ++flags.DEFINE_boolean('use_gpu_relax', use_gpu_relax, 'Whether to relax on GPU. ' + 'Relax on GPU can be much faster than CPU, so it is ' + 'recommended to enable if possible. GPUs must be available' + ' if this setting is enabled.') +@@ -334,6 +377,10 @@ + 'sure it is installed on your system.') + + use_small_bfd = FLAGS.db_preset == 'reduced_dbs' ++ if use_small_bfd and data_dir: ++ bfd_database_path = None ++ uniref30_database_path = None ++ + _check_flag('small_bfd_database_path', 'db_preset', + should_be_set=use_small_bfd) + _check_flag('bfd_database_path', 'db_preset', +@@ -456,13 +503,7 @@ + flags.mark_flags_as_required([ + 'fasta_paths', + 'output_dir', +- 'data_dir', +- 'uniref90_database_path', +- 'mgnify_database_path', +- 'template_mmcif_dir', + 'max_template_date', +- 'obsolete_pdbs_path', +- 'use_gpu_relax', + ]) + + app.run(main) diff --git a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.3.2_use_openmm_8.0.0.patch b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.3.2_use_openmm_8.0.0.patch new file mode 100644 index 000000000000..765fdb3c4d65 --- /dev/null +++ b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.3.2_use_openmm_8.0.0.patch @@ -0,0 +1,243 @@ +# Add compatibility with OpenMM-8.0.0 +# The patch is based on the recipe from https://github.com/deepmind/alphafold/issues/404 +# Author: maxim-masterov (SURF) (7.7.0) +# update 8.0.0: THEMBL +diff -ru alphafold-2.3.2/alphafold/relax/amber_minimize.py alphafold-2.3.2_use_openmm_7.7.0/alphafold/relax/amber_minimize.py +--- alphafold-2.3.2/alphafold/relax/amber_minimize.py 2023-03-27 13:50:49.000000000 +0200 ++++ alphafold-2.3.2_use_openmm_7.7.0/alphafold/relax/amber_minimize.py 2023-04-06 10:38:33.512754033 +0200 +@@ -27,10 +27,10 @@ + import ml_collections + import numpy as np + import jax +-from simtk import openmm +-from simtk import unit +-from simtk.openmm import app as openmm_app +-from simtk.openmm.app.internal.pdbstructure import PdbStructure ++from openmm import * ++from openmm import unit ++from openmm import app as openmm_app ++from openmm.app.internal.pdbstructure import PdbStructure + + + ENERGY = unit.kilocalories_per_mole +@@ -47,7 +47,7 @@ + + + def _add_restraints( +- system: openmm.System, ++ system: System, + reference_pdb: openmm_app.PDBFile, + stiffness: unit.Unit, + rset: str, +diff -ru alphafold-2.3.2/alphafold/relax/cleanup.py alphafold-2.3.2_use_openmm_7.7.0/alphafold/relax/cleanup.py +--- alphafold-2.3.2/alphafold/relax/cleanup.py 2023-03-27 13:50:49.000000000 +0200 ++++ alphafold-2.3.2_use_openmm_7.7.0/alphafold/relax/cleanup.py 2023-04-06 10:39:25.224888763 +0200 +@@ -20,8 +20,8 @@ + import io + + import pdbfixer +-from simtk.openmm import app +-from simtk.openmm.app import element ++from openmm import app ++from openmm.app import element + + + def fix_pdb(pdbfile, alterations_info): +diff -ru alphafold-2.3.2/alphafold/relax/cleanup_test.py alphafold-2.3.2_use_openmm_7.7.0/alphafold/relax/cleanup_test.py +--- alphafold-2.3.2/alphafold/relax/cleanup_test.py 2023-03-27 13:50:49.000000000 +0200 ++++ alphafold-2.3.2_use_openmm_7.7.0/alphafold/relax/cleanup_test.py 2023-04-06 10:39:58.409616942 +0200 +@@ -17,7 +17,7 @@ + + from absl.testing import absltest + from alphafold.relax import cleanup +-from simtk.openmm.app.internal import pdbstructure ++from openmm.app.internal import pdbstructure + + + def _pdb_to_structure(pdb_str): +diff -ru alphafold-2.3.2/docker/Dockerfile alphafold-2.3.2_use_openmm_7.7.0/docker/Dockerfile +--- alphafold-2.3.2/docker/Dockerfile 2023-03-27 13:50:49.000000000 +0200 ++++ alphafold-2.3.2_use_openmm_7.7.0/docker/Dockerfile 2023-04-06 10:41:10.315194781 +0200 +@@ -76,7 +76,6 @@ + + # Apply OpenMM patch. + WORKDIR /opt/conda/lib/python3.8/site-packages +-RUN patch -p0 < /app/alphafold/docker/openmm.patch + + # Add SETUID bit to the ldconfig binary so that non-root users can run it. + RUN chmod u+s /sbin/ldconfig.real +diff -ru alphafold-2.3.2/notebooks/AlphaFold.ipynb alphafold-2.3.2_use_openmm_7.7.0/notebooks/AlphaFold.ipynb +--- alphafold-2.3.2/notebooks/AlphaFold.ipynb 2023-03-27 13:50:49.000000000 +0200 ++++ alphafold-2.3.2_use_openmm_7.7.0/notebooks/AlphaFold.ipynb 2023-04-06 10:50:41.351746867 +0200 +@@ -103,16 +103,17 @@ + " %shell rm -rf /opt/conda\n", + " %shell wget -q -P /tmp \\\n", + " https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \\\n", +- " \u0026\u0026 bash /tmp/Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda \\\n", +- " \u0026\u0026 rm /tmp/Miniconda3-latest-Linux-x86_64.sh\n", ++ " && bash /tmp/Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda \\\n", ++ " && rm /tmp/Miniconda3-latest-Linux-x86_64.sh\n", ++ + " pbar.update(9)\n", + "\n", + " PATH=%env PATH\n", + " %env PATH=/opt/conda/bin:{PATH}\n", + " %shell conda install -qy conda==4.13.0 \\\n", +- " \u0026\u0026 conda install -qy -c conda-forge \\\n", ++ " && conda install -qy -c conda-forge \\\n", + " python=3.9 \\\n", +- " openmm=7.5.1 \\\n", ++ " openmm=8.0.0 \\\n", + " pdbfixer\n", + " pbar.update(80)\n", + "\n", +@@ -164,8 +165,8 @@ + " pbar.update(10)\n", + "\n", + " # Apply OpenMM patch.\n", +- " %shell pushd /opt/conda/lib/python3.9/site-packages/ \u0026\u0026 \\\n", +- " patch -p0 \u003c /content/alphafold/docker/openmm.patch \u0026\u0026 \\\n", ++ " %shell pushd /opt/conda/lib/python3.8/site-packages/ && \\\n", ++ + " popd\n", + "\n", + " # Make sure stereo_chemical_props.txt is in all locations where it could be searched for.\n", +@@ -189,9 +190,10 @@ + "\n", + "import jax\n", + "if jax.local_devices()[0].platform == 'tpu':\n", +- " raise RuntimeError('Colab TPU runtime not supported. Change it to GPU via Runtime -\u003e Change Runtime Type -\u003e Hardware accelerator -\u003e GPU.')\n", ++ " raise RuntimeError('Colab TPU runtime not supported. Change it to GPU via Runtime -> Change Runtime Type -> Hardware accelerator -> GPU.')\n", + "elif jax.local_devices()[0].platform == 'cpu':\n", +- " raise RuntimeError('Colab CPU runtime not supported. Change it to GPU via Runtime -\u003e Change Runtime Type -\u003e Hardware accelerator -\u003e GPU.')\n", ++ " raise RuntimeError('Colab CPU runtime not supported. Change it to GPU via Runtime -> Change Runtime Type -> Hardware accelerator -> GPU.')\n", ++ + "else:\n", + " print(f'Running with {jax.local_devices()[0].device_kind} GPU')\n", + "\n", +@@ -211,7 +213,7 @@ + "source": [ + "## Making a prediction\n", + "\n", +- "Please paste the sequence of your protein in the text box below, then run the remaining cells via _Runtime_ \u003e _Run after_. You can also run the cells individually by pressing the _Play_ button on the left.\n", ++ "Please paste the sequence of your protein in the text box below, then run the remaining cells via _Runtime_ > _Run after_. You can also run the cells individually by pressing the _Play_ button on the left.\n", + "\n", + "Note that the search against databases and the actual prediction can take some time, from minutes to hours, depending on the length of the protein and what type of GPU you are allocated by Colab (see FAQ below)." + ] +@@ -306,21 +308,21 @@ + "\n", + "# Check whether total length exceeds limit.\n", + "total_sequence_length = sum([len(seq) for seq in sequences])\n", +- "if total_sequence_length \u003e MAX_LENGTH:\n", ++ "if total_sequence_length > MAX_LENGTH:\n", + " raise ValueError('The total sequence length is too long: '\n", + " f'{total_sequence_length}, while the maximum is '\n", + " f'{MAX_LENGTH}.')\n", + "\n", + "# Check whether we exceed the monomer limit.\n", + "if model_type_to_use == ModelType.MONOMER:\n", +- " if len(sequences[0]) \u003e MAX_MONOMER_MODEL_LENGTH:\n", ++ " if len(sequences[0]) > MAX_MONOMER_MODEL_LENGTH:\n", + " raise ValueError(\n", + " f'Input sequence is too long: {len(sequences[0])} amino acids, while '\n", + " f'the maximum for the monomer model is {MAX_MONOMER_MODEL_LENGTH}. You may '\n", + " 'be able to run this sequence with the multimer model by selecting the '\n", + " 'use_multimer_model_for_monomers checkbox above.')\n", + " \n", +- "if total_sequence_length \u003e MAX_VALIDATED_LENGTH:\n", ++ "if total_sequence_length > MAX_VALIDATED_LENGTH:\n", + " print('WARNING: The accuracy of the system has not been fully validated '\n", + " 'above 3000 residues, and you may experience long running times or '\n", + " f'run out of memory. Total sequence length is {total_sequence_length} '\n", +@@ -421,7 +423,7 @@ + "]\n", + "\n", + "# Search UniProt and construct the all_seq features only for heteromers, not homomers.\n", +- "if model_type_to_use == ModelType.MULTIMER and len(set(sequences)) \u003e 1:\n", ++ "if model_type_to_use == ModelType.MULTIMER and len(set(sequences)) > 1:\n", + " MSA_DATABASES.extend([\n", + " # Swiss-Prot and TrEMBL are concatenated together as UniProt.\n", + " {'db_name': 'uniprot',\n", +@@ -455,7 +457,7 @@ + " for sequence_index, sequence in enumerate(sorted(set(sequences)), 1):\n", + " fasta_path = f'target_{sequence_index:02d}.fasta'\n", + " with open(fasta_path, 'wt') as f:\n", +- " f.write(f'\u003equery\\n{sequence}')\n", ++ " f.write(f'>query\\n{sequence}')\n", + " sequence_to_fasta_path[sequence] = fasta_path\n", + "\n", + " # Run the search against chunks of genetic databases (since the genetic\n", +@@ -516,7 +518,7 @@ + " num_templates=0, num_res=len(sequence)))\n", + "\n", + " # Construct the all_seq features only for heteromers, not homomers.\n", +- " if model_type_to_use == ModelType.MULTIMER and len(set(sequences)) \u003e 1:\n", ++ " if model_type_to_use == ModelType.MULTIMER and len(set(sequences)) > 1:\n", + " valid_feats = msa_pairing.MSA_FEATURES + (\n", + " 'msa_species_identifiers',\n", + " )\n", +@@ -680,7 +682,7 @@ + "banded_b_factors = []\n", + "for plddt in plddts[best_model_name]:\n", + " for idx, (min_val, max_val, _) in enumerate(PLDDT_BANDS):\n", +- " if plddt \u003e= min_val and plddt \u003c= max_val:\n", ++ " if plddt >= min_val and plddt <= max_val:\n", + " banded_b_factors.append(idx)\n", + " break\n", + "banded_b_factors = np.array(banded_b_factors)[:, None] * final_atom_mask\n", +@@ -693,14 +695,14 @@ + " f.write(relaxed_pdb)\n", + "\n", + "\n", +- "# --- Visualise the prediction \u0026 confidence ---\n", ++ "# --- Visualise the prediction & confidence ---\n", + "show_sidechains = True\n", + "def plot_plddt_legend():\n", + " \"\"\"Plots the legend for pLDDT.\"\"\"\n", +- " thresh = ['Very low (pLDDT \u003c 50)',\n", +- " 'Low (70 \u003e pLDDT \u003e 50)',\n", +- " 'Confident (90 \u003e pLDDT \u003e 70)',\n", +- " 'Very high (pLDDT \u003e 90)']\n", ++ " thresh = ['Very low (pLDDT < 50)',\n", ++ " 'Low (70 > pLDDT > 50)',\n", ++ " 'Confident (90 > pLDDT > 70)',\n", ++ " 'Very high (pLDDT > 90)']\n", + "\n", + " colors = [x[2] for x in PLDDT_BANDS]\n", + "\n", +@@ -816,13 +818,13 @@ + "id": "jeb2z8DIA4om" + }, + "source": [ +- "## FAQ \u0026 Troubleshooting\n", ++ "## FAQ & Troubleshooting\n", + "\n", + "\n", + "* How do I get a predicted protein structure for my protein?\n", + " * Click on the _Connect_ button on the top right to get started.\n", + " * Paste the amino acid sequence of your protein (without any headers) into the “Enter the amino acid sequence to fold”.\n", +- " * Run all cells in the Colab, either by running them individually (with the play button on the left side) or via _Runtime_ \u003e _Run all._ Make sure you run all 5 cells in order.\n", ++ " * Run all cells in the Colab, either by running them individually (with the play button on the left side) or via _Runtime_ > _Run all._ Make sure you run all 5 cells in order.\n", + " * The predicted protein structure will be downloaded once all cells have been executed. Note: This can take minutes to hours - see below.\n", + "* How long will this take?\n", + " * Downloading the AlphaFold source code can take up to a few minutes.\n", +@@ -831,8 +833,8 @@ + " * Running AlphaFold and generating the prediction can take minutes to hours, depending on the length of your protein and on which GPU-type Colab has assigned you.\n", + "* My Colab no longer seems to be doing anything, what should I do?\n", + " * Some steps may take minutes to hours to complete.\n", +- " * If nothing happens or if you receive an error message, try restarting your Colab runtime via _Runtime_ \u003e _Restart runtime_.\n", +- " * If this doesn’t help, try resetting your Colab runtime via _Runtime_ \u003e _Factory reset runtime_.\n", ++ " * If nothing happens or if you receive an error message, try restarting your Colab runtime via _Runtime_ > _Restart runtime_.\n", ++ " * If this doesn’t help, try resetting your Colab runtime via _Runtime_ > _Factory reset runtime_.\n", + "* How does this compare to the open-source version of AlphaFold?\n", + " * This Colab version of AlphaFold searches a selected portion of the BFD dataset and currently doesn’t use templates, so its accuracy is reduced in comparison to the full version of AlphaFold that is described in the [AlphaFold paper](https://doi.org/10.1038/s41586-021-03819-2) and [Github repo](https://github.com/deepmind/alphafold/) (the full version is available via the inference script).\n", + "* What is a Colab?\n", +@@ -841,7 +843,7 @@ + " * The resources allocated to your Colab vary. See the [Colab FAQ](https://research.google.com/colaboratory/faq.html) for more details.\n", + " * You can execute the Colab nonetheless.\n", + "* I received an error “Colab CPU runtime not supported” or “No GPU/TPU found”, what do I do?\n", +- " * Colab CPU runtime is not supported. Try changing your runtime via _Runtime_ \u003e _Change runtime type_ \u003e _Hardware accelerator_ \u003e _GPU_.\n", ++ " * Colab CPU runtime is not supported. Try changing your runtime via _Runtime_ > _Change runtime type_ > _Hardware accelerator_ > _GPU_.\n", + " * The type of GPU allocated to your Colab varies. See the [Colab FAQ](https://research.google.com/colaboratory/faq.html) for more details.\n", + " * If you receive “Cannot connect to GPU backend”, you can try again later to see if Colab allocates you a GPU.\n", + " * [Colab Pro](https://colab.research.google.com/signup) offers priority access to GPUs.\n", diff --git a/easybuild/easyconfigs/d/dm-haiku/dm-haiku-0.0.12-foss-2023a-CUDA-12.1.1.eb b/easybuild/easyconfigs/d/dm-haiku/dm-haiku-0.0.12-foss-2023a-CUDA-12.1.1.eb new file mode 100644 index 000000000000..f66f10ec5571 --- /dev/null +++ b/easybuild/easyconfigs/d/dm-haiku/dm-haiku-0.0.12-foss-2023a-CUDA-12.1.1.eb @@ -0,0 +1,50 @@ +# update 0.0.12: Thomas Hoffmann (EMBL) +easyblock = 'PythonBundle' + +name = 'dm-haiku' +version = '0.0.12' +versionsuffix = '-CUDA-%(cudaver)s' + +homepage = 'https://github.com/deepmind/dm-haiku' +description = """Haiku is a simple neural network library for JAX developed by some of the authors of Sonnet, a neural +network library for TensorFlow.""" + +toolchain = {'name': 'foss', 'version': '2023a'} + +dependencies = [ + ('Python', '3.11.3'), + ('SciPy-bundle', '2023.07'), + ('jax', '0.4.25', versionsuffix), # required by jmp, also provides absl-py + ('PyYAML', '6.0'), + ('CUDA', '12.1.1', '', SYSTEM), + ('tensorstore', '0.1.65'), + ('protobuf-python', '4.24.0'), + ('Optax', '0.2.2', versionsuffix), +] + +use_pip = True + +exts_list = [ + ('jmp', '0.0.4', { + 'checksums': ['5dfeb0fd7c7a9f72a70fff0aab9d0cbfae32a809c02f4037ff3485ceb33e1730'], + }), + ('flax', '0.8.4', { + 'checksums': ['968683f850198e1aa5eb2d9d1e20bead880ef7423c14f042db9d60848cb1c90b'], + }), + ('nest_asyncio', '1.6.0', { + 'checksums': ['6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe'], + }), + ('orbax_checkpoint', '0.5.18', { + 'modulename': 'orbax.checkpoint', + 'preinstallopts': """sed -i 's/jax >= 0.4.25/&\\*/g' pyproject.toml &&""", + 'checksums': ['29f5d311b412760bd6a2fecab3bdbf75407bc00dc6d0457d19478258ecc8fa6d'], + }), + (name, version, { + 'modulename': 'haiku', + 'checksums': ['ba0b3acf71433156737fe342c486da11727e5e6c9e054245f4f9b8f0b53eb608'], + }), +] + +sanity_pip_check = True + +moduleclass = 'lib' diff --git a/easybuild/easyconfigs/t/tensorstore/tensorstore-0.1.65-foss-2023a.eb b/easybuild/easyconfigs/t/tensorstore/tensorstore-0.1.65-foss-2023a.eb new file mode 100644 index 000000000000..5dd271c0d88e --- /dev/null +++ b/easybuild/easyconfigs/t/tensorstore/tensorstore-0.1.65-foss-2023a.eb @@ -0,0 +1,62 @@ +# Thomas Hoffmann, EMBL Heidlelberg, structures-it@embl.de, 2024/10 +easyblock = 'PythonBundle' + +name = 'tensorstore' +version = '0.1.65' + +homepage = 'https://github.com/google/tensorstore' +description = """TensorStore is an open-source C++ and Python software library designed for +storage and manipulation of large multi-dimensional arrays.""" + +toolchain = {'name': 'foss', 'version': '2023a'} +builddependencies = [ + ('NASM', '2.16.01'), + ('pybind11', '2.11.1'), + ('Bazel', '6.3.1'), + + + + ('PyYAML', '6.0'), + ('zlib', '1.2.13'), + ('LibTIFF', '4.5.0'), + ('snappy', '1.1.10'), + ('Brotli', '1.0.9'), + ('protobuf', '24.0'), + ('bzip2', '1.0.8'), + ('zstd', '1.5.5'), + ('libwebp', '1.3.1'), + ('nlohmann_json', '3.11.2'), + ('Blosc', '1.21.5'), +] + +dependencies = [ + ('Python', '3.11.3'), + ('SciPy-bundle', '2023.07'), + ('ml_dtypes', '0.3.2'), +] + +use_pip = True + + +local_ts_useebbazel = """sed -i 's/bazel_path =.*/""" +local_ts_useebbazel += """bazel_path = os.path.join(os.getenv("EBROOTBAZEL"),"bin", "bazel")/g'""" +local_ts_useebbazel += " bazelisk.py&& " # TODO: patch? +local_ts_version = """sed -i "s/use_scm_version=/version='%(version)s',&/g" setup.py&&""" +local_ts_bzl_exp = """export TENSORSTORE_BAZEL_STARTUP_OPTIONS='--output_user_root %(builddir)s/cache' &&""" +# inject CFLAGS: +local_ts_bzl_exp += """export TENSORSTORE_BAZEL_BUILD_OPTIONS="$(for i in $CFLAGS;do echo --copt=$i; done)" &&""" + + +local_ts_preinstall = local_ts_version + local_ts_useebbazel + local_ts_bzl_exp + +exts_list = [ + (name, version, { + 'installopts': '-v', + 'preinstallopts': local_ts_preinstall, + 'checksums': ['65cbe5a600c32569bb0b9f597ea318cc298a13b42d5fc98168c97bb11f320eae'], + }), +] + +sanity_pip_check = True + +moduleclass = 'lib'