Skip to content

Commit

Permalink
Update Vagrant & easy_install
Browse files Browse the repository at this point in the history
- change easy_install scripts to install libs and docs
  one level up from INSTALLATION_PATH
  - It would be cleaner to re-define INSTALLATION_PATH
    to function like a standard PREFIX (e.g. /usr/local),
    but this would be a bigger change
- Conform dev version to python standard (1.1.0.dev0)
- Use additional resources in vagrant box (cpu, storage)
  - needed for compiling casper
- Vagrantfile now installs all needed software
- document using vagrant
- All (non-skipped) tests pass from the vagrant box
  • Loading branch information
sbliven committed Jun 27, 2020
1 parent bef995b commit ed32613
Show file tree
Hide file tree
Showing 11 changed files with 216 additions and 58 deletions.
201 changes: 159 additions & 42 deletions Vagrantfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,52 +14,39 @@ Vagrant.configure("2") do |config|
# boxes at https://vagrantcloud.com/search.
config.vm.box = "ubuntu/bionic64"

# Disable automatic box update checking. If you disable this, then
# boxes will only be checked for updates when the user runs
# `vagrant box outdated`. This is not recommended.
# config.vm.box_check_update = false

# Create a forwarded port mapping which allows access to a specific port
# within the machine from a port on the host machine. In the example below,
# accessing "localhost:8080" will access port 80 on the guest machine.
# NOTE: This will enable public access to the opened port
# config.vm.network "forwarded_port", guest: 80, host: 8080

# Create a forwarded port mapping which allows access to a specific port
# within the machine from a port on the host machine and only allow access
# via 127.0.0.1 to disable public access
# config.vm.network "forwarded_port", guest: 80, host: 8080, host_ip: "127.0.0.1"

# Create a private network, which allows host-only access to the machine
# using a specific IP.
# config.vm.network "private_network", ip: "192.168.33.10"

# Create a public network, which generally matched to bridged network.
# Bridged networks make the machine appear as another physical device on
# your network.
# config.vm.network "public_network"
# Increase CPUs and memory for the box
# https://stackoverflow.com/a/37335639/81658
config.vm.provider "virtualbox" do |v|
host = RbConfig::CONFIG['host_os']
# Give VM 3/4 system memory & access to all cpu cores on the host
if host =~ /darwin/
cpus = `sysctl -n hw.ncpu`.to_i
elsif host =~ /linux/
cpus = `nproc`.to_i
else # Windows folks
cpus = `wmic cpu get NumberOfCores`.split("\n")[2].to_i
end

puts "Provisioning VM with #{cpus} CPU"
v.customize ["modifyvm", :id, "--cpus", cpus]
end

# Need extra disk size for casper compilation
# Requires disksize plugin:
# vagrant plugin install vagrant-disksize
config.disksize.size = '15GB'

# Log in as root
#config.ssh.username = 'root'
#config.ssh.password = 'vagrant'
#config.ssh.insert_key = 'true'

# Share an additional folder to the guest VM. The first argument is
# the path on the host to the actual folder. The second argument is
# the path on the guest to mount the folder. And the optional third
# argument is a set of non-required options.
# config.vm.synced_folder "../data", "/vagrant_data"

# Provider-specific configuration so you can fine-tune various
# backing providers for Vagrant. These expose provider-specific options.
# Example for VirtualBox:
#
# config.vm.provider "virtualbox" do |vb|
# # Display the VirtualBox GUI when booting the machine
# vb.gui = true
#
# # Customize the amount of memory on the VM:
# vb.memory = "1024"
# end
#
# View the documentation for the provider you are using for more
# information on available options.

# Enable provisioning with a shell script. Additional provisioners such as
# Puppet, Chef, Ansible, Salt, and Docker are also available. Please see the
# documentation for more information about their specific syntax and use.
Expand All @@ -68,26 +55,156 @@ Vagrant.configure("2") do |config|
# apt-get install -y apache2
# SHELL
config.vm.provision "shell", inline: <<-SHELL
set -e
# Install dependencies
apt-get update
apt-get -y dist-upgrade
apt-get -y install python3 python3-pip unzip
apt-get -y install python3 python3-pip unzip openjdk-8-jre-headless cmake
pip3 install virtualenv
# reduce wget output during provisioning
echo 'verbose = off' >> ~/.wgetrc
# Assistive tech cause java problems
rm -f /usr/lib/jvm/java-8-openjdk-amd64/jre/lib/accessibility.properties
cd /vagrant/easy_setup
# accept all licenses
sed -i 's/ACCEPT_ALL=no/ACCEPT_ALL=yes/i' configTRAL_path.cfg
if grep -q ACCEPT_ALL configTRAL_path.cfg; then
sed -i 's/ACCEPT_ALL=no/ACCEPT_ALL=yes/i' configTRAL_path.cfg
else
echo ACCEPT_ALL=yes >> configTRAL_path.cfg
fi
# Install TRAL software
./setupTRAL.sh setup
# Config file
cat <<END > ~/.tral/config.ini
###########################################
### Configuration file for TRAL Vagrant ###
###########################################
sequence_type = AA
[sequence]
[[repeat_detection]]
# AA includes all detectors used by default on protein sequence data.
AA = HHrepID, T-REKS, TRUST, XSTREAM
# DNA includes all detectors used by default on protein sequence data.
DNA = Phobos, TRED, T-REKS, TRF, XSTREAM
[[repeat_detector_path]]
# If the executable is in the system path, supply its name. Otherwise, supply the full path to the executable. Details are explained in TRAL's online docs.
PHOBOS = phobos
HHrepID = hhrepid_64
HHrepID_dummyhmm = ~/.tral/data/hhrepid/dummyHMM.hmm
T-REKS = T-REKS
TRED = tred
TRF = trf
TRUST = TRUST
TRUST_substitutionmatrix = ~/.tral/tral_external_software/TRUST_Align/Align/BLOSUM50
XSTREAM = XSTREAM
[hmm]
hmmbuild = hmmbuild
l_effective_max = 50
[filter]
[[basic]]
tag = basic_filter
[[[dict]]]
[[[[pvalue]]]]
func_name = pvalue
score = phylo_gap01
threshold = 0.1
[[[[n_effective]]]]
func_name = attribute
attribute = n_effective
type = min
threshold = 1.9
[repeat]
scoreslist = phylo_gap01, # score (the comma in the end is needed for TRAL)
calc_score = False # is the score calculated?
calc_pvalue = False # is the pvalue calculated?
precision = 10
ginsi = ginsi # integrated in MAFFT
Castor = Castor
[[castor_parameter]]
rate_distribution = constant # either constant or gamma
alfsim = alfsim
[repeat_list]
# Columns to include in repeat list TSV output
# Allowed values:
# - begin: position of the tandem repeats within the sequence,
# - pvalue: statistical significance of the tandem repeats
# - divergence: divergence of the tandem repeat units
# - l_effective: length of the tandem repeat units
# - n_effective: number of tandem repeat units
# - msa_original: multiple sequence alignment
# - score: score corresponding to the value of 'model'
# - repeat_region_length: total length of repeat region
output_characteristics = begin, msa_original, l_effective, n_effective, repeat_region_length, divergence, pvalue
# model for scoring repeats. Supported: entropy, parsimony, pSim, phylo, phylo_gap01, phylo_gap001
model = phylo_gap01
[repeat_score]
evolutionary_model = lg
[[indel]]
indel_rate_per_site = 0.01
ignore_gaps = True
gaps = row_wise
zipf = 1.821
[[optimisation]]
start_min = 0.5
start_max = 1.5
n_iteration = 14
[[K80]]
kappa = 2.59
[[TN93]]
alpha_1 = 0.3
alpha_2 = 0.4
beta = 0.7
[[score_calibration]]
scoreslist=phylo_gap01, # score (the comma at the end is needed)
save_calibration = False
precision = 10
[AA]
standard_chars = A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y
all_chars = A, B, C, D, E, F, G, H, I, K, L, M, N, O, P, Q, R, S, T, U, V, W, X, Y, Z
[[ambiguous_chars]]
B = D,N
O = K,
U = C,
Z = E,Q
X = A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y
[DNA]
standard_chars = A, C, G, T
all_chars = A, C, G, T, N, X
[[ambiguous_chars]]
N = A, C, G, T
X = A, C, G, T
END
# All external software
./install_ext_software.sh
cd /vagrant
# dev requirements are optional but useful for tests and docs
pip3 install -r requirements_dev.txt
echo
echo "THIS MACHINE CONTAINS PROPRIETARY SOFTWARE."
echo "Please check the licenses before using (e.g. no commercial use permitted)"
SHELL

end
2 changes: 1 addition & 1 deletion docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -92,4 +92,4 @@ Indices and tables
overlap_filtering
workflow
search_hmm

vagrant
32 changes: 32 additions & 0 deletions docs/vagrant.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
.. _vagrant:

Running TRAL using vagrant
==========================

Building
--------

To build a vagrant box, first clone tral from the git repo. Next, run
::

vagrant up
vagrant ssh

This will The current working directory will be mounted at /vagrant within the box.
Tral should be run as root within the box (it uses /root/.tral as the data dir).
::

sudo su
cd /vagrant


Testing TRAL
------------

The vagrant box is useful for testing tral.
::

vagrant ssh
sudo su
cd /vagrant
pytest
2 changes: 1 addition & 1 deletion easy_setup/configTRAL_path.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ INSTALLATION_PATH=/usr/local/bin
## ATTENTION:
# Please do only change these filesystem if you know where to adapt the changes

TRAL_PATH=$FILES/tral
TRAL_PATH=$FILES/.tral
TRAL_EXT_SOFTWARE=$TRAL_PATH/tral_external_software

TRAL=$TRAL_PATH/tral # TRAL library
Expand Down
5 changes: 1 addition & 4 deletions easy_setup/install_ext_software/alf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,7 @@ if [ ! -d "$TRAL_EXT_SOFTWARE/ALF_standalone" ]; then # test if not already in d
fi

rm -rf "$TRAL_EXT_SOFTWARE/ALF_standalone.tar.gz"
(cd "$TRAL_EXT_SOFTWARE/ALF_standalone" && "$TRAL_EXT_SOFTWARE/ALF_standalone/install.sh" "$INSTALLATION_PATH") # installation of ALF
ln -sf "$INSTALLATION_PATH/bin/alfsim" "$INSTALLATION_PATH"
ln -sf "$INSTALLATION_PATH/bin/alfdarwin.linux64" "$INSTALLATION_PATH"
ln -sf "$INSTALLATION_PATH/bin/alfdarwin" "$INSTALLATION_PATH"
(cd "$TRAL_EXT_SOFTWARE/ALF_standalone" && "$TRAL_EXT_SOFTWARE/ALF_standalone/install.sh" "$INSTALLATION_PATH/..") # installation of ALF

######################
### Uninstall ALF (default paths!)
Expand Down
3 changes: 1 addition & 2 deletions easy_setup/install_ext_software/hmmer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,13 @@ done
{
cd "$TRAL_EXT_SOFTWARE/hmmer-"*
} && {
./configure --prefix "$INSTALLATION_PATH"
./configure --prefix "$INSTALLATION_PATH/.."
make clean
make
# "$INSTALLATION_PATH"/bin make check # run a test suite
make install
} && {
echo "Installation of HMMER done."
ln -s "$INSTALLATION_PATH/bin/hmmbuild" "$INSTALLATION_PATH/hmmbuild"
echo -e "\nhmmbuild is in your path $INSTALLATION_PATH\n"
}
)
Expand Down
3 changes: 1 addition & 2 deletions easy_setup/install_ext_software/mafft.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,7 @@ if [ ! -d "$TRAL_EXT_SOFTWARE/$mafftVer" ]; then
exit 1
}
tar -xvzf "$TRAL_EXT_SOFTWARE/$mafftVer" -C "$TRAL_EXT_SOFTWARE"
sed -i "s#PREFIX = /usr/local#PREFIX = \"$INSTALLATION_PATH\"#" "$TRAL_EXT_SOFTWARE/$latestVer/core/Makefile" # change default installation path in Makefile
sed -i "s#BINDIR = \$(PREFIX)/bin#BINDIR = \$(PREFIX)#" "$TRAL_EXT_SOFTWARE/$latestVer/core/Makefile"
sed -i "s#PREFIX = /usr/local#PREFIX = \"$INSTALLATION_PATH/..\"#" "$TRAL_EXT_SOFTWARE/$latestVer/core/Makefile" # change default installation path in Makefile

( cd "$TRAL_EXT_SOFTWARE/$latestVer/core/" && make clean && make && make install ) # Installation
rm -rf "$TRAL_EXT_SOFTWARE/"$latestVer""
Expand Down
2 changes: 1 addition & 1 deletion tral/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.1.0-dev"
__version__ = "1.1.0.dev0"
1 change: 1 addition & 0 deletions tral/repeat/test/repeat_align_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
'RASVLFANE----KYKSALQELE--ELK-QIVPKESLVYFL',
'IGKVYKKLG----QTHLALMNFS--WAM-DLDPKGA----N']


def test_repeat_alignment():
''' Test a realignment using Mafft's ginsi.'''

Expand Down
21 changes: 17 additions & 4 deletions tral/sequence/repeat_detection_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@
import sys
import tempfile

from tral import configuration
from tral.sequence import repeat_detection_io
from .. import configuration
from . import repeat_detection_io
from ..paths import CONFIG_DIR, config_file

LOG = logging.getLogger(__name__)

Expand Down Expand Up @@ -196,10 +197,22 @@ def __init__(self):
"-shuffle": False
}

dummyhmm = os.path.expanduser(REPEAT_DETECTOR_PATH['HHrepID_dummyhmm'])
if not os.path.exists(dummyhmm):
# Special case for the data directory
try:
rel_config = os.path.relpath(dummyhmm, start=CONFIG_DIR)
if not rel_config.startswith(".."):
dummyhmm = config_file(rel_config)
except FileNotFoundError:
pass # not in data; error below
if not os.path.exists(dummyhmm):
raise FileNotFoundError("HHrepID_dummyhmm not found: %s" % dummyhmm)

self.valopts = {
# <file> input query alignment (fasta/a2m/a3m) or HMM file (.hhm)
"-i": None,
"-d": os.path.expanduser(REPEAT_DETECTOR_PATH['HHrepID_dummyhmm']), # <path> dummy hmm database file
"-d": dummyhmm, # <path> dummy hmm database file
"-o": 'hhrepID.o', # <file> write results and multiple sequence alignment to file (default=none)
"-v": 0, # -v: verbose mode (default: show only warnings) ; -v 0: suppress all screen outpu
"-P": None, # <float> max p-value of suboptimal alignments in all search rounds but the last one (def=0.1)
Expand Down Expand Up @@ -654,7 +667,7 @@ def __init__(self):
}

self.valopts = {
"-matrix": REPEAT_DETECTOR_PATH['TRUST_substitutionmatrix'],
"-matrix": os.path.expanduser(REPEAT_DETECTOR_PATH['TRUST_substitutionmatrix']),
"-gapo": "8",
"-gapx": "2",
"-procTotal": "1",
Expand Down
2 changes: 1 addition & 1 deletion tral/tral_configuration/config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ sequence_type = AA
TRED = tred
TRF = trf
TRUST = TRUST
TRUST_substitutionmatrix = path/to/TRAL_PATH/tral_external_software/TRUST/Align/BLOSUM50
TRUST_substitutionmatrix = ~/.tral/tral_external_software/TRUST_Align/Align/BLOSUM50
XSTREAM = XSTREAM

[hmm]
Expand Down

0 comments on commit ed32613

Please sign in to comment.