-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
ffa18a8
commit 48b82ec
Showing
31 changed files
with
1,565 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
sequence_type = AA | ||
|
||
[sequence] | ||
[[repeat_detection]] | ||
# AA includes all detectors used by default on protein sequence data. | ||
AA = HHrepID, T-REKS, TRUST, XSTREAM | ||
# DNA includes all detectors used by default on protein sequence data. | ||
DNA = Phobos, TRED, T-REKS, TRF, XSTREAM | ||
[[repeat_detector_path]] | ||
# If the executable is in the system path, supply its name. Otherwise, supply the full path to the executable. Details are explained in TRAL's online docs. | ||
PHOBOS = phobos | ||
HHrepID = hhrepid_64 | ||
HHrepID_dummyhmm = /.tral/data/hhrepid/dummyHMM.hmm | ||
T-REKS = T-REKS | ||
TRED = tred | ||
TRF = trf | ||
TRUST = TRUST | ||
TRUST_substitutionmatrix = /tral/tral_external_software/TRUST_Align/Align/BLOSUM50 | ||
XSTREAM = XSTREAM | ||
|
||
[hmm] | ||
hmmbuild = hmmbuild | ||
l_effective_max = 50 | ||
|
||
[filter] | ||
[[basic]] | ||
tag = basic_filter | ||
[[[dict]]] | ||
[[[[pvalue]]]] | ||
func_name = pvalue | ||
score = phylo_gap01 | ||
threshold = 0.1 | ||
[[[[n_effective]]]] | ||
func_name = attribute | ||
attribute = n_effective | ||
type = min | ||
threshold = 1.9 | ||
|
||
[repeat] | ||
scoreslist = phylo_gap01, # score (the comma in the end is needed for TRAL) | ||
calc_score = False # is the score calculated? | ||
calc_pvalue = False # is the pvalue calculated? | ||
precision = 10 | ||
ginsi = ginsi # integrated in MAFFT | ||
Castor = Castor | ||
[[castor_parameter]] | ||
rate_distribution = constant # either constant or gamma | ||
alfsim = alfsim | ||
|
||
[repeat_list] | ||
# Columns to include in repeat list TSV output | ||
# Allowed values: | ||
# - begin: position of the tandem repeats within the sequence, | ||
# - pvalue: statistical significance of the tandem repeats | ||
# - divergence: divergence of the tandem repeat units | ||
# - l_effective: length of the tandem repeat units | ||
# - n_effective: number of tandem repeat units | ||
# - msa_original: multiple sequence alignment | ||
# - score: score corresponding to the value of 'model' | ||
# - repeat_region_length: total length of repeat region | ||
output_characteristics = begin, msa_original, l_effective, n_effective, repeat_region_length, divergence, pvalue | ||
|
||
# model for scoring repeats. Supported: entropy, parsimony, pSim, phylo, phylo_gap01, phylo_gap001 | ||
model = phylo_gap01 | ||
|
||
[repeat_score] | ||
evolutionary_model = lg | ||
[[indel]] | ||
indel_rate_per_site = 0.01 | ||
ignore_gaps = True | ||
gaps = row_wise | ||
zipf = 1.821 | ||
[[optimisation]] | ||
start_min = 0.5 | ||
start_max = 1.5 | ||
n_iteration = 14 | ||
[[K80]] | ||
kappa = 2.59 | ||
[[TN93]] | ||
alpha_1 = 0.3 | ||
alpha_2 = 0.4 | ||
beta = 0.7 | ||
[[score_calibration]] | ||
scoreslist=phylo_gap01, # score (the comma at the end is needed) | ||
save_calibration = False | ||
precision = 10 | ||
|
||
[AA] | ||
standard_chars = A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y | ||
all_chars = A, B, C, D, E, F, G, H, I, K, L, M, N, O, P, Q, R, S, T, U, V, W, X, Y, Z | ||
[[ambiguous_chars]] | ||
B = D,N | ||
O = K, | ||
U = C, | ||
Z = E,Q | ||
X = A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y | ||
[DNA] | ||
standard_chars = A, C, G, T | ||
all_chars = A, C, G, T, N, X | ||
[[ambiguous_chars]] | ||
N = A, C, G, T | ||
X = A, C, G, T |
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
0.531678 | ||
0.557967 0.451095 | ||
0.827445 0.154899 5.549530 | ||
0.574478 1.019843 0.313311 0.105625 | ||
0.556725 3.021995 0.768834 0.521646 0.091304 | ||
1.066681 0.318483 0.578115 7.766557 0.053907 3.417706 | ||
1.740159 1.359652 0.773313 1.272434 0.546389 0.231294 1.115632 | ||
0.219970 3.210671 4.025778 1.032342 0.724998 5.684080 0.243768 0.201696 | ||
0.361684 0.239195 0.491003 0.115968 0.150559 0.078270 0.111773 0.053769 0.181788 | ||
0.310007 0.372261 0.137289 0.061486 0.164593 0.709004 0.097485 0.069492 0.540571 2.335139 | ||
0.369437 6.529255 2.529517 0.282466 0.049009 2.966732 1.731684 0.269840 0.525096 0.202562 0.146481 | ||
0.469395 0.431045 0.330720 0.190001 0.409202 0.456901 0.175084 0.130379 0.329660 4.831666 3.856906 0.624581 | ||
0.138293 0.065314 0.073481 0.032522 0.678335 0.045683 0.043829 0.050212 0.453428 0.777090 2.500294 0.024521 0.436181 | ||
1.959599 0.710489 0.121804 0.127164 0.123653 1.608126 0.191994 0.208081 1.141961 0.098580 1.060504 0.216345 0.164215 0.148483 | ||
3.887095 1.001551 5.057964 0.589268 2.155331 0.548807 0.312449 1.874296 0.743458 0.405119 0.592511 0.474478 0.285564 0.943971 2.788406 | ||
4.582565 0.650282 2.351311 0.425159 0.469823 0.523825 0.331584 0.316862 0.477355 2.553806 0.272514 0.965641 2.114728 0.138904 1.176961 4.777647 | ||
0.084329 1.257961 0.027700 0.057466 1.104181 0.172206 0.114381 0.544180 0.128193 0.134510 0.530324 0.089134 0.201334 0.537922 0.069965 0.310927 0.080556 | ||
0.139492 0.235601 0.700693 0.453952 2.114852 0.254745 0.063452 0.052500 5.848400 0.303445 0.241094 0.087904 0.189870 5.484236 0.113850 0.628608 0.201094 0.747889 | ||
2.924161 0.171995 0.164525 0.315261 0.621323 0.179771 0.465271 0.470140 0.121827 9.533943 1.761439 0.124066 3.038533 0.593478 0.211561 0.408532 1.143980 0.239697 0.165473 | ||
|
||
|
||
0.076862 0.051057 0.042546 0.051269 0.020279 0.041061 0.061820 0.074714 0.022983 0.052569 0.091111 0.059498 0.023414 0.040530 0.050532 0.068225 0.058518 0.014336 0.032303 0.066374 | ||
|
||
|
||
A R N D C Q E G H I L K M F P S T W Y V | ||
Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val | ||
|
||
|
||
JTT rate matrix prepared using the DCMut method* | ||
------------------------------------------------ | ||
|
||
The first part above indicates the symmetric 'exchangeability' parameters s_ij, | ||
where s_ij = s_ji. | ||
The second part gives the amino acid equilibrium frequencies pi_i. | ||
The net replacement rate from i to j is q_ij = pi_j*s_ij. | ||
|
||
This model is usually scaled so that the mean rate of change at | ||
equilibrium, Sum_i Sum_j!=i pi_i*q_ij, equals 1. You should check this | ||
scaling before using the matrix above. The PAML package will perform | ||
this scaling. | ||
|
||
// | ||
|
||
*Prepared by Carolin Kosiol and Nick Goldman, December 2003. | ||
|
||
See the following paper for more details: | ||
Kosiol, C., and Goldman, N. 2005. Different versions of the Dayhoff rate matrix. | ||
Molecular Biology and Evolution 22:193-199. | ||
|
||
See also http://www.ebi.ac.uk/goldman/dayhoff |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
0.425093 | ||
0.276818 0.751878 | ||
0.395144 0.123954 5.076149 | ||
2.489084 0.534551 0.528768 0.062556 | ||
0.969894 2.807908 1.695752 0.523386 0.084808 | ||
1.038545 0.363970 0.541712 5.243870 0.003499 4.128591 | ||
2.066040 0.390192 1.437645 0.844926 0.569265 0.267959 0.348847 | ||
0.358858 2.426601 4.509238 0.927114 0.640543 4.813505 0.423881 0.311484 | ||
0.149830 0.126991 0.191503 0.010690 0.320627 0.072854 0.044265 0.008705 0.108882 | ||
0.395337 0.301848 0.068427 0.015076 0.594007 0.582457 0.069673 0.044261 0.366317 4.145067 | ||
0.536518 6.326067 2.145078 0.282959 0.013266 3.234294 1.807177 0.296636 0.697264 0.159069 0.137500 | ||
1.124035 0.484133 0.371004 0.025548 0.893680 1.672569 0.173735 0.139538 0.442472 4.273607 6.312358 0.656604 | ||
0.253701 0.052722 0.089525 0.017416 1.105251 0.035855 0.018811 0.089586 0.682139 1.112727 2.592692 0.023918 1.798853 | ||
1.177651 0.332533 0.161787 0.394456 0.075382 0.624294 0.419409 0.196961 0.508851 0.078281 0.249060 0.390322 0.099849 0.094464 | ||
4.727182 0.858151 4.008358 1.240275 2.784478 1.223828 0.611973 1.739990 0.990012 0.064105 0.182287 0.748683 0.346960 0.361819 1.338132 | ||
2.139501 0.578987 2.000679 0.425860 1.143480 1.080136 0.604545 0.129836 0.584262 1.033739 0.302936 1.136863 2.020366 0.165001 0.571468 6.472279 | ||
0.180717 0.593607 0.045376 0.029890 0.670128 0.236199 0.077852 0.268491 0.597054 0.111660 0.619632 0.049906 0.696175 2.457121 0.095131 0.248862 0.140825 | ||
0.218959 0.314440 0.612025 0.135107 1.165532 0.257336 0.120037 0.054679 5.306834 0.232523 0.299648 0.131932 0.481306 7.803902 0.089613 0.400547 0.245841 3.151815 | ||
2.547870 0.170887 0.083688 0.037967 1.959291 0.210332 0.245034 0.076701 0.119013 10.649107 1.702745 0.185202 1.898718 0.654683 0.296501 0.098369 2.188158 0.189510 0.249313 | ||
|
||
0.079066 0.055941 0.041977 0.053052 0.012937 0.040767 0.071586 0.057337 0.022355 0.062157 0.099081 0.064600 0.022951 0.042302 0.044040 0.061197 0.053287 0.012066 0.034155 0.069147 | ||
|
||
A R N D C Q E G H I L K M F P S T W Y V | ||
Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val | ||
|
||
Symmetrical part of the rate matrix and aa frequencies, | ||
estimated from 3905 globular protein amino acid sequences forming 182 | ||
protein families. | ||
The first part above indicates the symmetric 'exchangeability' | ||
parameters, where s_ij = s_ji. The s_ij above are not scaled, but the | ||
PAML package will perform this scaling. | ||
The second part gives the amino acid frequencies (pi_i) | ||
estimated from the 3905 sequences. The net replacement rate from i to | ||
j is Q_ij = s_ij*pi_j. | ||
|
||
|
||
Citation: | ||
|
||
Le, S. Q., and O. Gascuel. 2008. An improved general amino acid replacement matrix. Mol. Biol. Evol. 25:1307-1320. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
0.425093 | ||
0.276818 0.751878 | ||
0.395144 0.123954 5.076149 | ||
2.489084 0.534551 0.528768 0.062556 | ||
0.969894 2.807908 1.695752 0.523386 0.084808 | ||
1.038545 0.363970 0.541712 5.243870 0.003499 4.128591 | ||
2.066040 0.390192 1.437645 0.844926 0.569265 0.267959 0.348847 | ||
0.358858 2.426601 4.509238 0.927114 0.640543 4.813505 0.423881 0.311484 | ||
0.149830 0.126991 0.191503 0.010690 0.320627 0.072854 0.044265 0.008705 0.108882 | ||
0.395337 0.301848 0.068427 0.015076 0.594007 0.582457 0.069673 0.044261 0.366317 4.145067 | ||
0.536518 6.326067 2.145078 0.282959 0.013266 3.234294 1.807177 0.296636 0.697264 0.159069 0.137500 | ||
1.124035 0.484133 0.371004 0.025548 0.893680 1.672569 0.173735 0.139538 0.442472 4.273607 6.312358 0.656604 | ||
0.253701 0.052722 0.089525 0.017416 1.105251 0.035855 0.018811 0.089586 0.682139 1.112727 2.592692 0.023918 1.798853 | ||
1.177651 0.332533 0.161787 0.394456 0.075382 0.624294 0.419409 0.196961 0.508851 0.078281 0.249060 0.390322 0.099849 0.094464 | ||
4.727182 0.858151 4.008358 1.240275 2.784478 1.223828 0.611973 1.739990 0.990012 0.064105 0.182287 0.748683 0.346960 0.361819 1.338132 | ||
2.139501 0.578987 2.000679 0.425860 1.143480 1.080136 0.604545 0.129836 0.584262 1.033739 0.302936 1.136863 2.020366 0.165001 0.571468 6.472279 | ||
0.180717 0.593607 0.045376 0.029890 0.670128 0.236199 0.077852 0.268491 0.597054 0.111660 0.619632 0.049906 0.696175 2.457121 0.095131 0.248862 0.140825 | ||
0.218959 0.314440 0.612025 0.135107 1.165532 0.257336 0.120037 0.054679 5.306834 0.232523 0.299648 0.131932 0.481306 7.803902 0.089613 0.400547 0.245841 3.151815 | ||
2.547870 0.170887 0.083688 0.037967 1.959291 0.210332 0.245034 0.076701 0.119013 10.649107 1.702745 0.185202 1.898718 0.654683 0.296501 0.098369 2.188158 0.189510 0.249313 | ||
|
||
0.069161169904116707 0.056884674049556723 0.035833476434349185 0.0479407202638895 0.022133798888638229 0.048263034257822017 0.072011405431553377 0.065918753416186013 0.02601579274034517 0.043181461066983624 0.098454003966423453 0.057435220134256809 0.021731264148762204 0.035697362115156471 0.06389942972842233 0.083537350487497455 0.053669110046364731 0.012227168092626202 0.026166076866009188 0.059838727961040609 | ||
|
||
// | ||
A R N D C Q E G H I L K M F P S T W Y V | ||
Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val | ||
|
||
Symmetrical part of the rate matrix and aa frequencies, | ||
estimated from 3905 globular protein amino acid sequences forming 182 | ||
protein families. | ||
The first part above indicates the symmetric 'exchangeability' | ||
parameters, where s_ij = s_ji. The s_ij above are not scaled, but the | ||
PAML package will perform this scaling. | ||
The second part gives the amino acid frequencies (pi_i) | ||
estimated from the 3905 sequences. The net replacement rate from i to | ||
j is Q_ij = s_ij*pi_j. | ||
|
||
|
||
Citation: | ||
|
||
Le, S. Q., and O. Gascuel. 2008. An improved general amino acid replacement matrix. Mol. Biol. Evol. 25:1307-1320. |
Oops, something went wrong.