Skip to content

Commit

Permalink
docker package dev environment
Browse files Browse the repository at this point in the history
  • Loading branch information
matteodelucchi committed May 12, 2020
1 parent ffa18a8 commit 48b82ec
Show file tree
Hide file tree
Showing 31 changed files with 1,565 additions and 0 deletions.
102 changes: 102 additions & 0 deletions docker/.tral/config.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
sequence_type = AA

[sequence]
[[repeat_detection]]
# AA includes all detectors used by default on protein sequence data.
AA = HHrepID, T-REKS, TRUST, XSTREAM
# DNA includes all detectors used by default on protein sequence data.
DNA = Phobos, TRED, T-REKS, TRF, XSTREAM
[[repeat_detector_path]]
# If the executable is in the system path, supply its name. Otherwise, supply the full path to the executable. Details are explained in TRAL's online docs.
PHOBOS = phobos
HHrepID = hhrepid_64
HHrepID_dummyhmm = /.tral/data/hhrepid/dummyHMM.hmm
T-REKS = T-REKS
TRED = tred
TRF = trf
TRUST = TRUST
TRUST_substitutionmatrix = /tral/tral_external_software/TRUST_Align/Align/BLOSUM50
XSTREAM = XSTREAM

[hmm]
hmmbuild = hmmbuild
l_effective_max = 50

[filter]
[[basic]]
tag = basic_filter
[[[dict]]]
[[[[pvalue]]]]
func_name = pvalue
score = phylo_gap01
threshold = 0.1
[[[[n_effective]]]]
func_name = attribute
attribute = n_effective
type = min
threshold = 1.9

[repeat]
scoreslist = phylo_gap01, # score (the comma in the end is needed for TRAL)
calc_score = False # is the score calculated?
calc_pvalue = False # is the pvalue calculated?
precision = 10
ginsi = ginsi # integrated in MAFFT
Castor = Castor
[[castor_parameter]]
rate_distribution = constant # either constant or gamma
alfsim = alfsim

[repeat_list]
# Columns to include in repeat list TSV output
# Allowed values:
# - begin: position of the tandem repeats within the sequence,
# - pvalue: statistical significance of the tandem repeats
# - divergence: divergence of the tandem repeat units
# - l_effective: length of the tandem repeat units
# - n_effective: number of tandem repeat units
# - msa_original: multiple sequence alignment
# - score: score corresponding to the value of 'model'
# - repeat_region_length: total length of repeat region
output_characteristics = begin, msa_original, l_effective, n_effective, repeat_region_length, divergence, pvalue

# model for scoring repeats. Supported: entropy, parsimony, pSim, phylo, phylo_gap01, phylo_gap001
model = phylo_gap01

[repeat_score]
evolutionary_model = lg
[[indel]]
indel_rate_per_site = 0.01
ignore_gaps = True
gaps = row_wise
zipf = 1.821
[[optimisation]]
start_min = 0.5
start_max = 1.5
n_iteration = 14
[[K80]]
kappa = 2.59
[[TN93]]
alpha_1 = 0.3
alpha_2 = 0.4
beta = 0.7
[[score_calibration]]
scoreslist=phylo_gap01, # score (the comma at the end is needed)
save_calibration = False
precision = 10

[AA]
standard_chars = A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y
all_chars = A, B, C, D, E, F, G, H, I, K, L, M, N, O, P, Q, R, S, T, U, V, W, X, Y, Z
[[ambiguous_chars]]
B = D,N
O = K,
U = C,
Z = E,Q
X = A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y
[DNA]
standard_chars = A, C, G, T
all_chars = A, C, G, T, N, X
[[ambiguous_chars]]
N = A, C, G, T
X = A, C, G, T
91 changes: 91 additions & 0 deletions docker/.tral/data/ALF/ecm.dat

Large diffs are not rendered by default.

89 changes: 89 additions & 0 deletions docker/.tral/data/ALF/ecmu.dat

Large diffs are not rendered by default.

50 changes: 50 additions & 0 deletions docker/.tral/data/ALF/jtt.dat
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
0.531678
0.557967 0.451095
0.827445 0.154899 5.549530
0.574478 1.019843 0.313311 0.105625
0.556725 3.021995 0.768834 0.521646 0.091304
1.066681 0.318483 0.578115 7.766557 0.053907 3.417706
1.740159 1.359652 0.773313 1.272434 0.546389 0.231294 1.115632
0.219970 3.210671 4.025778 1.032342 0.724998 5.684080 0.243768 0.201696
0.361684 0.239195 0.491003 0.115968 0.150559 0.078270 0.111773 0.053769 0.181788
0.310007 0.372261 0.137289 0.061486 0.164593 0.709004 0.097485 0.069492 0.540571 2.335139
0.369437 6.529255 2.529517 0.282466 0.049009 2.966732 1.731684 0.269840 0.525096 0.202562 0.146481
0.469395 0.431045 0.330720 0.190001 0.409202 0.456901 0.175084 0.130379 0.329660 4.831666 3.856906 0.624581
0.138293 0.065314 0.073481 0.032522 0.678335 0.045683 0.043829 0.050212 0.453428 0.777090 2.500294 0.024521 0.436181
1.959599 0.710489 0.121804 0.127164 0.123653 1.608126 0.191994 0.208081 1.141961 0.098580 1.060504 0.216345 0.164215 0.148483
3.887095 1.001551 5.057964 0.589268 2.155331 0.548807 0.312449 1.874296 0.743458 0.405119 0.592511 0.474478 0.285564 0.943971 2.788406
4.582565 0.650282 2.351311 0.425159 0.469823 0.523825 0.331584 0.316862 0.477355 2.553806 0.272514 0.965641 2.114728 0.138904 1.176961 4.777647
0.084329 1.257961 0.027700 0.057466 1.104181 0.172206 0.114381 0.544180 0.128193 0.134510 0.530324 0.089134 0.201334 0.537922 0.069965 0.310927 0.080556
0.139492 0.235601 0.700693 0.453952 2.114852 0.254745 0.063452 0.052500 5.848400 0.303445 0.241094 0.087904 0.189870 5.484236 0.113850 0.628608 0.201094 0.747889
2.924161 0.171995 0.164525 0.315261 0.621323 0.179771 0.465271 0.470140 0.121827 9.533943 1.761439 0.124066 3.038533 0.593478 0.211561 0.408532 1.143980 0.239697 0.165473


0.076862 0.051057 0.042546 0.051269 0.020279 0.041061 0.061820 0.074714 0.022983 0.052569 0.091111 0.059498 0.023414 0.040530 0.050532 0.068225 0.058518 0.014336 0.032303 0.066374


A R N D C Q E G H I L K M F P S T W Y V
Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val


JTT rate matrix prepared using the DCMut method*
------------------------------------------------

The first part above indicates the symmetric 'exchangeability' parameters s_ij,
where s_ij = s_ji.
The second part gives the amino acid equilibrium frequencies pi_i.
The net replacement rate from i to j is q_ij = pi_j*s_ij.

This model is usually scaled so that the mean rate of change at
equilibrium, Sum_i Sum_j!=i pi_i*q_ij, equals 1. You should check this
scaling before using the matrix above. The PAML package will perform
this scaling.

//

*Prepared by Carolin Kosiol and Nick Goldman, December 2003.

See the following paper for more details:
Kosiol, C., and Goldman, N. 2005. Different versions of the Dayhoff rate matrix.
Molecular Biology and Evolution 22:193-199.

See also http://www.ebi.ac.uk/goldman/dayhoff
39 changes: 39 additions & 0 deletions docker/.tral/data/ALF/lg.dat
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
0.425093
0.276818 0.751878
0.395144 0.123954 5.076149
2.489084 0.534551 0.528768 0.062556
0.969894 2.807908 1.695752 0.523386 0.084808
1.038545 0.363970 0.541712 5.243870 0.003499 4.128591
2.066040 0.390192 1.437645 0.844926 0.569265 0.267959 0.348847
0.358858 2.426601 4.509238 0.927114 0.640543 4.813505 0.423881 0.311484
0.149830 0.126991 0.191503 0.010690 0.320627 0.072854 0.044265 0.008705 0.108882
0.395337 0.301848 0.068427 0.015076 0.594007 0.582457 0.069673 0.044261 0.366317 4.145067
0.536518 6.326067 2.145078 0.282959 0.013266 3.234294 1.807177 0.296636 0.697264 0.159069 0.137500
1.124035 0.484133 0.371004 0.025548 0.893680 1.672569 0.173735 0.139538 0.442472 4.273607 6.312358 0.656604
0.253701 0.052722 0.089525 0.017416 1.105251 0.035855 0.018811 0.089586 0.682139 1.112727 2.592692 0.023918 1.798853
1.177651 0.332533 0.161787 0.394456 0.075382 0.624294 0.419409 0.196961 0.508851 0.078281 0.249060 0.390322 0.099849 0.094464
4.727182 0.858151 4.008358 1.240275 2.784478 1.223828 0.611973 1.739990 0.990012 0.064105 0.182287 0.748683 0.346960 0.361819 1.338132
2.139501 0.578987 2.000679 0.425860 1.143480 1.080136 0.604545 0.129836 0.584262 1.033739 0.302936 1.136863 2.020366 0.165001 0.571468 6.472279
0.180717 0.593607 0.045376 0.029890 0.670128 0.236199 0.077852 0.268491 0.597054 0.111660 0.619632 0.049906 0.696175 2.457121 0.095131 0.248862 0.140825
0.218959 0.314440 0.612025 0.135107 1.165532 0.257336 0.120037 0.054679 5.306834 0.232523 0.299648 0.131932 0.481306 7.803902 0.089613 0.400547 0.245841 3.151815
2.547870 0.170887 0.083688 0.037967 1.959291 0.210332 0.245034 0.076701 0.119013 10.649107 1.702745 0.185202 1.898718 0.654683 0.296501 0.098369 2.188158 0.189510 0.249313

0.079066 0.055941 0.041977 0.053052 0.012937 0.040767 0.071586 0.057337 0.022355 0.062157 0.099081 0.064600 0.022951 0.042302 0.044040 0.061197 0.053287 0.012066 0.034155 0.069147

A R N D C Q E G H I L K M F P S T W Y V
Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val

Symmetrical part of the rate matrix and aa frequencies,
estimated from 3905 globular protein amino acid sequences forming 182
protein families.
The first part above indicates the symmetric 'exchangeability'
parameters, where s_ij = s_ji. The s_ij above are not scaled, but the
PAML package will perform this scaling.
The second part gives the amino acid frequencies (pi_i)
estimated from the 3905 sequences. The net replacement rate from i to
j is Q_ij = s_ij*pi_j.


Citation:

Le, S. Q., and O. Gascuel. 2008. An improved general amino acid replacement matrix. Mol. Biol. Evol. 25:1307-1320.
40 changes: 40 additions & 0 deletions docker/.tral/data/ALF/lg_FHuman.dat
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
0.425093
0.276818 0.751878
0.395144 0.123954 5.076149
2.489084 0.534551 0.528768 0.062556
0.969894 2.807908 1.695752 0.523386 0.084808
1.038545 0.363970 0.541712 5.243870 0.003499 4.128591
2.066040 0.390192 1.437645 0.844926 0.569265 0.267959 0.348847
0.358858 2.426601 4.509238 0.927114 0.640543 4.813505 0.423881 0.311484
0.149830 0.126991 0.191503 0.010690 0.320627 0.072854 0.044265 0.008705 0.108882
0.395337 0.301848 0.068427 0.015076 0.594007 0.582457 0.069673 0.044261 0.366317 4.145067
0.536518 6.326067 2.145078 0.282959 0.013266 3.234294 1.807177 0.296636 0.697264 0.159069 0.137500
1.124035 0.484133 0.371004 0.025548 0.893680 1.672569 0.173735 0.139538 0.442472 4.273607 6.312358 0.656604
0.253701 0.052722 0.089525 0.017416 1.105251 0.035855 0.018811 0.089586 0.682139 1.112727 2.592692 0.023918 1.798853
1.177651 0.332533 0.161787 0.394456 0.075382 0.624294 0.419409 0.196961 0.508851 0.078281 0.249060 0.390322 0.099849 0.094464
4.727182 0.858151 4.008358 1.240275 2.784478 1.223828 0.611973 1.739990 0.990012 0.064105 0.182287 0.748683 0.346960 0.361819 1.338132
2.139501 0.578987 2.000679 0.425860 1.143480 1.080136 0.604545 0.129836 0.584262 1.033739 0.302936 1.136863 2.020366 0.165001 0.571468 6.472279
0.180717 0.593607 0.045376 0.029890 0.670128 0.236199 0.077852 0.268491 0.597054 0.111660 0.619632 0.049906 0.696175 2.457121 0.095131 0.248862 0.140825
0.218959 0.314440 0.612025 0.135107 1.165532 0.257336 0.120037 0.054679 5.306834 0.232523 0.299648 0.131932 0.481306 7.803902 0.089613 0.400547 0.245841 3.151815
2.547870 0.170887 0.083688 0.037967 1.959291 0.210332 0.245034 0.076701 0.119013 10.649107 1.702745 0.185202 1.898718 0.654683 0.296501 0.098369 2.188158 0.189510 0.249313

0.069161169904116707 0.056884674049556723 0.035833476434349185 0.0479407202638895 0.022133798888638229 0.048263034257822017 0.072011405431553377 0.065918753416186013 0.02601579274034517 0.043181461066983624 0.098454003966423453 0.057435220134256809 0.021731264148762204 0.035697362115156471 0.06389942972842233 0.083537350487497455 0.053669110046364731 0.012227168092626202 0.026166076866009188 0.059838727961040609

//
A R N D C Q E G H I L K M F P S T W Y V
Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val

Symmetrical part of the rate matrix and aa frequencies,
estimated from 3905 globular protein amino acid sequences forming 182
protein families.
The first part above indicates the symmetric 'exchangeability'
parameters, where s_ij = s_ji. The s_ij above are not scaled, but the
PAML package will perform this scaling.
The second part gives the amino acid frequencies (pi_i)
estimated from the 3905 sequences. The net replacement rate from i to
j is Q_ij = s_ij*pi_j.


Citation:

Le, S. Q., and O. Gascuel. 2008. An improved general amino acid replacement matrix. Mol. Biol. Evol. 25:1307-1320.
Loading

0 comments on commit 48b82ec

Please sign in to comment.