Skip to content

Commit 493cefe

Browse files
Add mode to compute exact (slow) tmscore.
1 parent 75a50f7 commit 493cefe

10 files changed

+144
-14
lines changed

Diff for: src/commons/LocalParameters.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ LocalParameters::LocalParameters() :
1919
PARAM_CHAIN_NAME_MODE(PARAM_CHAIN_NAME_MODE_ID,"--chain-name-mode", "Chain name mode", "Add chain to name:\n0: auto\n1: always add\n",typeid(int), (void *) &chainNameMode, "^[0-1]{1}$", MMseqsParameter::COMMAND_EXPERT),
2020
PARAM_WRITE_MAPPING(PARAM_WRITE_MAPPING_ID, "--write-mapping", "Write mapping file", "write _mapping file containing mapping from internal id to taxonomic identifier", typeid(int), (void *) &writeMapping, "^[0-1]{1}", MMseqsParameter::COMMAND_EXPERT),
2121
PARAM_TMALIGN_FAST(PARAM_TMALIGN_FAST_ID,"--tmalign-fast", "TMalign fast","turn on fast search in TM-align" ,typeid(int), (void *) &tmAlignFast, "^[0-1]{1}$"),
22+
PARAM_EXACT_TMSCORE(PARAM_EXACT_TMSCORE_ID,"--exact-tmscore", "Exact TMscore","turn on fast exact TMscore (slow), default is approximate" ,typeid(int), (void *) &exactTMscore, "^[0-1]{1}$"),
2223
PARAM_N_SAMPLE(PARAM_N_SAMPLE_ID, "--n-sample", "Sample size","pick N random sample" ,typeid(int), (void *) &nsample, "^[0-9]{1}[0-9]*$"),
2324
PARAM_COORD_STORE_MODE(PARAM_COORD_STORE_MODE_ID, "--coord-store-mode", "Coord store mode", "Coordinate storage mode: \n1: C-alpha as float\n2: C-alpha as difference (uint16_t)", typeid(int), (void *) &coordStoreMode, "^[1-2]{1}$",MMseqsParameter::COMMAND_EXPERT),
2425
PARAM_MIN_ASSIGNED_CHAINS_THRESHOLD(PARAM_MIN_ASSIGNED_CHAINS_THRESHOLD_ID, "--min-assigned-chains-ratio", "Minimum assigned chains percentage Threshold", "minimum percentage of assigned chains out of all query chains > thr [0,100] %", typeid(float), (void *) & minAssignedChainsThreshold, "^[0-9]*(\\.[0-9]+)?$"),
@@ -86,6 +87,8 @@ LocalParameters::LocalParameters() :
8687
structurecreatedb.push_back(&PARAM_THREADS);
8788
structurecreatedb.push_back(&PARAM_V);
8889

90+
convertalignments.push_back(&PARAM_EXACT_TMSCORE);
91+
8992
createindex.push_back(&PARAM_INDEX_EXCLUDE);
9093

9194
// tmalign
@@ -103,6 +106,7 @@ LocalParameters::LocalParameters() :
103106
tmalign.push_back(&PARAM_THREADS);
104107
tmalign.push_back(&PARAM_V);
105108

109+
structurerescorediagonal.push_back(&PARAM_EXACT_TMSCORE);
106110
structurerescorediagonal.push_back(&PARAM_TMSCORE_THRESHOLD);
107111
structurerescorediagonal.push_back(&PARAM_LDDT_THRESHOLD);
108112
structurerescorediagonal.push_back(&PARAM_ALIGNMENT_TYPE);
@@ -112,6 +116,7 @@ LocalParameters::LocalParameters() :
112116
structurealign.push_back(&PARAM_LDDT_THRESHOLD);
113117
structurealign.push_back(&PARAM_SORT_BY_STRUCTURE_BITS);
114118
structurealign.push_back(&PARAM_ALIGNMENT_TYPE);
119+
structurealign.push_back(&PARAM_EXACT_TMSCORE);
115120
structurealign = combineList(structurealign, align);
116121
// tmalign.push_back(&PARAM_GAP_OPEN);
117122
// tmalign.push_back(&PARAM_GAP_EXTEND);
@@ -205,6 +210,7 @@ LocalParameters::LocalParameters() :
205210
chainNameMode = 0;
206211
writeMapping = 0;
207212
tmAlignFast = 1;
213+
exactTMscore = 0;
208214
gapOpen = 10;
209215
gapExtend = 1;
210216
nsample = 5000;

Diff for: src/commons/LocalParameters.h

+2
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ class LocalParameters : public Parameters {
104104
PARAMETER(PARAM_CHAIN_NAME_MODE)
105105
PARAMETER(PARAM_WRITE_MAPPING)
106106
PARAMETER(PARAM_TMALIGN_FAST)
107+
PARAMETER(PARAM_EXACT_TMSCORE)
107108
PARAMETER(PARAM_N_SAMPLE)
108109
PARAMETER(PARAM_COORD_STORE_MODE)
109110
PARAMETER(PARAM_MIN_ASSIGNED_CHAINS_THRESHOLD)
@@ -125,6 +126,7 @@ class LocalParameters : public Parameters {
125126
int chainNameMode;
126127
bool writeMapping;
127128
int tmAlignFast;
129+
int exactTMscore;
128130
int nsample;
129131
int coordStoreMode;
130132
float minAssignedChainsThreshold;

Diff for: src/commons/TMaligner.cpp

+116-5
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,14 @@
55
#include "TMaligner.h"
66
#include "tmalign/Coordinates.h"
77
#include <tmalign/TMalign.h>
8+
#include <tmalign/basic_fun.h>
89
#include "StructureSmithWaterman.h"
910
#include "StructureSmithWaterman.h"
1011

11-
TMaligner::TMaligner(unsigned int maxSeqLen, bool tmAlignFast, bool tmScoreOnly)
12+
TMaligner::TMaligner(unsigned int maxSeqLen, bool tmAlignFast, bool tmScoreOnly, bool computeExactScore)
1213
: tmAlignFast(tmAlignFast),
1314
xtm(maxSeqLen), ytm(maxSeqLen), xt(maxSeqLen),
14-
r1(maxSeqLen), r2(maxSeqLen){
15+
r1(maxSeqLen), r2(maxSeqLen), computeExactScore(computeExactScore){
1516
affineNW = NULL;
1617
if(tmScoreOnly == false){
1718
affineNW = new AffineNeedlemanWunsch(maxSeqLen, 20);
@@ -44,9 +45,10 @@ TMaligner::~TMaligner(){
4445
delete [] invmap;
4546
}
4647

47-
TMaligner::TMscoreResult TMaligner::computeTMscore(float *x, float *y, float *z, unsigned int targetLen,
48-
int qStartPos, int dbStartPos, const std::string &backtrace,
49-
int normalizationLen) {
48+
49+
TMaligner::TMscoreResult TMaligner::computeAppoximateTMscore(float *x, float *y, float *z, unsigned int targetLen,
50+
int qStartPos, int dbStartPos, const std::string &backtrace,
51+
int normalizationLen) {
5052
int qPos = qStartPos;
5153
int tPos = dbStartPos;
5254
std::string cigarString = backtrace;
@@ -100,6 +102,115 @@ TMaligner::TMscoreResult TMaligner::computeTMscore(float *x, float *y, float *z,
100102
return TMaligner::TMscoreResult(u, t, TM, rmsd0);
101103
}
102104

105+
106+
TMaligner::TMscoreResult TMaligner::computeExactTMscore(float *x, float *y, float *z, unsigned int targetLen,
107+
int qStartPos, int dbStartPos, const std::string &backtrace,
108+
int normalizationLen) {
109+
int qPos = qStartPos;
110+
int tPos = dbStartPos;
111+
std::string cigarString = backtrace;
112+
std::fill(invmap, invmap+queryLen, -1);
113+
for (size_t btPos = 0; btPos < cigarString.size(); btPos++) {
114+
if (cigarString[btPos] == 'M') {
115+
invmap[qPos] = tPos;
116+
qPos++;
117+
tPos++;
118+
}
119+
else if (cigarString[btPos] == 'I') {
120+
qPos++;
121+
}
122+
else {
123+
tPos++;
124+
}
125+
}
126+
127+
memcpy(target_x, x, sizeof(float) * targetLen);
128+
memcpy(target_y, y, sizeof(float) * targetLen);
129+
memcpy(target_z, z, sizeof(float) * targetLen);
130+
Coordinates targetCaCords;
131+
targetCaCords.x = target_x;
132+
targetCaCords.y = target_y;
133+
targetCaCords.z = target_z;
134+
Coordinates queryCaCords;
135+
queryCaCords.x = query_x;
136+
queryCaCords.y = query_y;
137+
queryCaCords.z = query_z;
138+
float t[3], u[3][3];
139+
float D0_MIN;
140+
141+
float rmsd0 = 0.0;
142+
float Lnorm; //normalization length
143+
float score_d8,d0,d0_search,dcu0;//for TMscore search
144+
parameter_set4search(normalizationLen, normalizationLen, D0_MIN, Lnorm,
145+
score_d8, d0, d0_search, dcu0);
146+
float local_d0_search = d0_search;
147+
148+
int simplify_step=1;
149+
if (tmAlignFast) {
150+
simplify_step=40;
151+
}
152+
detailed_search_standard(r1, r2, xtm, ytm, xt, targetCaCords, queryCaCords, queryLen,
153+
invmap, t, u, simplify_step, local_d0_search, true, Lnorm, score_d8, d0, mem);
154+
BasicFunction::do_rotation(targetCaCords, xt, targetLen, t, u);
155+
int k = 0;
156+
for(unsigned int j=0; j<queryLen; j++)
157+
{
158+
int i=invmap[j];
159+
if(i>=0)//aligned
160+
{
161+
float d = sqrt(BasicFunction::dist(xt.x[i], xt.y[i], xt.z[i], queryCaCords.x[j], queryCaCords.y[j],
162+
queryCaCords.z[j]));
163+
164+
if (i >= 0 || d <= score_d8) {
165+
r1.x[k] = targetCaCords.x[i];
166+
r1.y[k] = targetCaCords.y[i];
167+
r1.z[k] = targetCaCords.z[i];
168+
169+
r2.x[k] = queryCaCords.x[j];
170+
r2.y[k] = queryCaCords.y[j];
171+
r2.z[k] = queryCaCords.z[j];
172+
173+
xtm.x[k] = targetCaCords.x[i];
174+
xtm.y[k] = targetCaCords.y[i];
175+
xtm.z[k] = targetCaCords.z[i];
176+
177+
ytm.x[k] = queryCaCords.x[j];
178+
ytm.y[k] = queryCaCords.y[j];
179+
ytm.z[k] = queryCaCords.z[j];
180+
181+
k++;
182+
}
183+
}
184+
}
185+
int n_ali8=k;
186+
187+
KabschFast(r1, r2, n_ali8, &rmsd0, t, u, mem);// rmsd0 is used for final output, only recalculate rmsd0, not t & u
188+
rmsd0 = sqrt(rmsd0 / n_ali8);
189+
190+
simplify_step=1;
191+
float Lnorm_0=normalizationLen;
192+
//normalized by length of structure A
193+
parameter_set4final(Lnorm_0, D0_MIN, Lnorm,
194+
d0, d0_search);
195+
local_d0_search = d0_search;
196+
197+
double TM = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t, u, simplify_step,
198+
&rmsd0, local_d0_search, Lnorm, score_d8, d0, mem);
199+
200+
return TMaligner::TMscoreResult(u, t, TM, rmsd0);
201+
}
202+
203+
TMaligner::TMscoreResult TMaligner::computeTMscore(float *x, float *y, float *z, unsigned int targetLen,
204+
int qStartPos, int dbStartPos, const std::string &backtrace,
205+
int normalizationLen) {
206+
if(computeExactScore){
207+
return computeExactTMscore(x, y, z, targetLen, qStartPos, dbStartPos, backtrace, normalizationLen);
208+
} else {
209+
return computeAppoximateTMscore(x, y, z, targetLen, qStartPos, dbStartPos, backtrace, normalizationLen);
210+
}
211+
}
212+
213+
103214
void TMaligner::initQuery(float *x, float *y, float *z, char * querySeq, unsigned int queryLen){
104215
memset(querySecStruc, 0, sizeof(char) * queryLen);
105216
memcpy(query_x, x, sizeof(float) * queryLen);

Diff for: src/commons/TMaligner.h

+12-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
class TMaligner{
1414
public:
15-
TMaligner(unsigned int maxSeqLen, bool tmAlignFast, bool tmScoreOnly);
15+
TMaligner(unsigned int maxSeqLen, bool tmAlignFast, bool tmScoreOnly, bool exact);
1616
~TMaligner();
1717

1818
struct TMscoreResult{
@@ -39,6 +39,7 @@ class TMaligner{
3939
unsigned int targetLen, int qStartPos,
4040
int targetStartPos, const std::string & backtrace,
4141
int normalizationLen);
42+
4243
Matcher::result_t align(unsigned int dbKey, float *target_x, float *target_y, float *target_z,
4344
char * targetSeq, unsigned int targetLen, float &TM);
4445

@@ -59,7 +60,17 @@ class TMaligner{
5960
std::string seqM, seqxA, seqyA;// for output alignment
6061
bool tmAlignFast;
6162
Coordinates xtm, ytm, xt, r1, r2;
63+
bool computeExactScore;
6264
int * invmap;
65+
66+
TMscoreResult computeExactTMscore(float *x, float *y, float *z,
67+
unsigned int targetLen, int qStartPos,
68+
int targetStartPos, const std::string & backtrace,
69+
int normalizationLen);
70+
TMscoreResult computeAppoximateTMscore(float *x, float *y, float *z,
71+
unsigned int targetLen, int qStartPos,
72+
int targetStartPos, const std::string & backtrace,
73+
int normalizationLen);
6374
};
6475

6576
#endif //FOLDSEEK_TMALIGNER_H

Diff for: src/strucclustutils/aln2tmscore.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
#endif
1616

1717
int aln2tmscore(int argc, const char **argv, const Command& command) {
18-
Parameters& par = Parameters::getInstance();
18+
LocalParameters &par = LocalParameters::getLocalInstance();
1919
par.parseParameters(argc, argv, command, true, 0, 0);
2020

2121
// never allow deletions
@@ -64,7 +64,7 @@ int aln2tmscore(int argc, const char **argv, const Command& command) {
6464
std::string resultsStr;
6565
resultsStr.reserve(10 * 1024);
6666

67-
TMaligner tmaln(std::max(qdbr.getMaxSeqLen() + 1,tdbr->getMaxSeqLen() + 1), false, true);
67+
TMaligner tmaln(std::max(qdbr.getMaxSeqLen() + 1,tdbr->getMaxSeqLen() + 1), false, true, par.exactTMscore);
6868
Coordinate16 qcoords;
6969
Coordinate16 tcoords;
7070

Diff for: src/strucclustutils/scorecomplex.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -472,7 +472,7 @@ class ComplexScorer {
472472
q3diDbr = qDbr3Di;
473473
t3diDbr = tDbr3Di;
474474
maxResLen = maxChainLen * 2;
475-
tmAligner = new TMaligner(maxResLen, false, true);
475+
tmAligner = new TMaligner(maxResLen, false, true, false);
476476
}
477477

478478
void getSearchResults(unsigned int qComplexId, std::vector<unsigned int> &qChainKeys, chainKeyToComplexId_t &dbChainKeyToComplexIdLookup, complexIdToChainKeys_t &dbComplexIdToChainKeysLookup, std::vector<SearchResult> &searchResults) {
@@ -568,7 +568,7 @@ class ComplexScorer {
568568
if (maxResLen < maxChainLen * std::min(searchResult.qChainKeys.size(), searchResult.dbChainKeys.size())) {
569569
delete tmAligner;
570570
maxResLen = std::max(searchResult.qChainKeys.size(), searchResult.dbChainKeys.size()) * maxChainLen;
571-
tmAligner = new TMaligner(maxResLen, false, true);
571+
tmAligner = new TMaligner(maxResLen, false, true, false);
572572
}
573573

574574
finalClusters.clear();

Diff for: src/strucclustutils/structurealign.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ int structurealign(int argc, const char **argv, const Command& command) {
294294
TMaligner *tmaligner = NULL;
295295
if(needTMaligner) {
296296
tmaligner = new TMaligner(
297-
std::max(q3DiDbr->sequenceReader->getMaxSeqLen() + 1, t3DiDbr.sequenceReader->getMaxSeqLen() + 1), false, true);
297+
std::max(q3DiDbr->sequenceReader->getMaxSeqLen() + 1, t3DiDbr.sequenceReader->getMaxSeqLen() + 1), false, true, par.exactTMscore);
298298
}
299299
LDDTCalculator *lddtcalculator = NULL;
300300
if(needLDDT) {

Diff for: src/strucclustutils/structureconvertalis.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -487,7 +487,7 @@ R"html(<!DOCTYPE html>
487487
TMaligner *tmaligner = NULL;
488488
if(needTMaligner) {
489489
tmaligner = new TMaligner(
490-
std::max(tDbr->sequenceReader->getMaxSeqLen() + 1, qDbr.sequenceReader->getMaxSeqLen() + 1), false, true);
490+
std::max(tDbr->sequenceReader->getMaxSeqLen() + 1, qDbr.sequenceReader->getMaxSeqLen() + 1), false, true, par.exactTMscore);
491491
}
492492
LDDTCalculator *lddtcalculator = NULL;
493493
if(needLDDT) {

Diff for: src/strucclustutils/structurerescorediagonal.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,7 @@ int structureungappedalign(int argc, const char **argv, const Command& command)
262262
TMaligner *tmaligner = NULL;
263263
if(needTMaligner) {
264264
tmaligner = new TMaligner(
265-
std::max(qdbr3Di.sequenceReader->getMaxSeqLen() + 1, t3DiDbr->sequenceReader->getMaxSeqLen() + 1), false, true);
265+
std::max(qdbr3Di.sequenceReader->getMaxSeqLen() + 1, t3DiDbr->sequenceReader->getMaxSeqLen() + 1), false, true, par.exactTMscore);
266266
}
267267
LDDTCalculator *lddtcalculator = NULL;
268268
if(needLDDT) {

Diff for: src/strucclustutils/tmalign.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ int tmalign(int argc, const char **argv, const Command& command) {
8484
#ifdef OPENMP
8585
thread_idx = static_cast<unsigned int>(omp_get_thread_num());
8686
#endif
87-
TMaligner tmaln(std::max(qdbr.sequenceReader->getMaxSeqLen() + 1,tdbr->sequenceReader->getMaxSeqLen() + 1), par.tmAlignFast, false);
87+
TMaligner tmaln(std::max(qdbr.sequenceReader->getMaxSeqLen() + 1,tdbr->sequenceReader->getMaxSeqLen() + 1), par.tmAlignFast, false, false);
8888
std::vector<Matcher::result_t> swResults;
8989
swResults.reserve(300);
9090
std::string backtrace;

0 commit comments

Comments
 (0)