Skip to content

Commit

Permalink
Add filterresult for pairwise HHblits filtering to reduce redundancy …
Browse files Browse the repository at this point in the history
…in a result db #316
  • Loading branch information
milot-mirdita committed Jul 28, 2020
1 parent 3bdaf48 commit 06bd0cf
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 5 deletions.
1 change: 1 addition & 0 deletions src/CommandDeclarations.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ extern int extractdomains(int argc, const char **argv, const Command& command);
extern int extractorfs(int argc, const char **argv, const Command& command);
extern int extractframes(int argc, const char **argv, const Command& command);
extern int filterdb(int argc, const char **argv, const Command& command);
extern int filterresult(int argc, const char **argv, const Command& command);
extern int gff2db(int argc, const char **argv, const Command& command);
extern int masksequence(int argc, const char **argv, const Command& command);
extern int indexdb(int argc, const char **argv, const Command& command);
Expand Down
9 changes: 9 additions & 0 deletions src/MMseqsBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -903,6 +903,15 @@ std::vector<Command> baseCommands = {
{"targetDB", DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA, &DbValidator::sequenceDb },
{"resultDB", DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA, &DbValidator::allDb },
{"statsDB", DbType::ACCESS_MODE_OUTPUT, DbType::NEED_DATA, &DbValidator::genericDb }}},
{"filterresult", filterresult, &par.filterresult, COMMAND_RESULT,
"Pairwise alignment result filter",
NULL,
"Milot Mirdita <[email protected]>",
"<i:queryDB> <i:targetDB> <i:resultDB> <o:resultDB>",
CITATION_MMSEQS2,{{"queryDB", DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA, &DbValidator::sequenceDb },
{"targetDB", DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA, &DbValidator::sequenceDb },
{"resultDB", DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA, &DbValidator::resultDb },
{"resultDB", DbType::ACCESS_MODE_OUTPUT, DbType::NEED_DATA, &DbValidator::resultDb }}},
{"offsetalignment", offsetalignment, &par.offsetalignment, COMMAND_RESULT,
"Offset alignment by ORF start position",
NULL,
Expand Down
15 changes: 15 additions & 0 deletions src/commons/Parameters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,21 @@ Parameters::Parameters():
//result2msa.push_back(&PARAM_FIRST_SEQ_REP_SEQ);
result2dnamsa.push_back(&PARAM_V);
// filterresult
filterresult.push_back(&PARAM_SUB_MAT);
filterresult.push_back(&PARAM_GAP_OPEN);
filterresult.push_back(&PARAM_GAP_EXTEND);
filterresult.push_back(&PARAM_NO_COMP_BIAS_CORR);
filterresult.push_back(&PARAM_ALLOW_DELETION);
filterresult.push_back(&PARAM_FILTER_MAX_SEQ_ID);
filterresult.push_back(&PARAM_FILTER_QID);
filterresult.push_back(&PARAM_FILTER_QSC);
filterresult.push_back(&PARAM_FILTER_COV);
filterresult.push_back(&PARAM_FILTER_NDIFF);
filterresult.push_back(&PARAM_THREADS);
filterresult.push_back(&PARAM_COMPRESSED);
filterresult.push_back(&PARAM_V);
// convertmsa
convertmsa.push_back(&PARAM_IDENTIFIER_FIELD);
convertmsa.push_back(&PARAM_COMPRESSED);
Expand Down
1 change: 1 addition & 0 deletions src/commons/Parameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -948,6 +948,7 @@ class Parameters {
std::vector<MMseqsParameter*> result2pp;
std::vector<MMseqsParameter*> result2msa;
std::vector<MMseqsParameter*> result2dnamsa;
std::vector<MMseqsParameter*> filterresult;
std::vector<MMseqsParameter*> convertmsa;
std::vector<MMseqsParameter*> msa2profile;
std::vector<MMseqsParameter*> createtsv;
Expand Down
23 changes: 18 additions & 5 deletions src/util/result2profile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,15 @@ int result2profile(int argc, const char **argv, const Command &command, bool ret
// default for result2profile to filter MSA
par.filterMsa = 1;
par.pca = 0.0;
par.parseParameters(argc, argv, command, true, 0, 0);
par.evalProfile = (par.evalThr < par.evalProfile) ? par.evalThr : par.evalProfile;
if (returnAlnRes) {
par.PARAM_FILTER_MAX_SEQ_ID.removeCategory(MMseqsParameter::COMMAND_EXPERT);
par.PARAM_FILTER_QID.removeCategory(MMseqsParameter::COMMAND_EXPERT);
par.PARAM_FILTER_QSC.removeCategory(MMseqsParameter::COMMAND_EXPERT);
par.PARAM_FILTER_COV.removeCategory(MMseqsParameter::COMMAND_EXPERT);
par.PARAM_FILTER_NDIFF.removeCategory(MMseqsParameter::COMMAND_EXPERT);
}
par.parseParameters(argc, argv, command, false, 0, 0);
par.evalProfile = (par.evalThr < par.evalProfile || returnAlnRes) ? par.evalThr : par.evalProfile;
par.printParameters(command.cmd, argc, argv, *command.params);

DBReader<unsigned int> resultReader(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader<unsigned int>::USE_DATA | DBReader<unsigned int>::USE_INDEX);
Expand Down Expand Up @@ -112,7 +119,7 @@ int result2profile(int argc, const char **argv, const Command &command, bool ret
Debug(Debug::INFO) << "Query database size: " << qDbr->getSize() << " type: " << qDbr->getDbTypeName() << "\n";
Debug(Debug::INFO) << "Target database size: " << tDbr->getSize() << " type: " << Parameters::getDbTypeName(targetSeqType) << "\n";

const bool isFiltering = par.filterMsa != 0;
const bool isFiltering = par.filterMsa != 0 || returnAlnRes;
Debug::Progress progress(dbSize - dbFrom);
#pragma omp parallel num_threads(localThreads)
{
Expand Down Expand Up @@ -203,7 +210,8 @@ int result2profile(int argc, const char **argv, const Command &command, bool ret
//MultipleAlignment::print(res, &subMat);

if (returnAlnRes) {
for (size_t i = 0; i < filteredSetSize; i++) {
// do not count query
for (size_t i = 0; i < (filteredSetSize - 1); ++i) {
size_t len = Matcher::resultToBuffer(buffer, res.alignmentResults[i], true);
result.append(buffer, len);
}
Expand Down Expand Up @@ -263,7 +271,7 @@ int result2profile(int argc, const char **argv, const Command &command, bool ret
}
delete[] charSequence;
}
resultWriter.close(true);
resultWriter.close(returnAlnRes == false);
resultReader.close();

if (!sameDatabase) {
Expand Down Expand Up @@ -301,3 +309,8 @@ int result2profile(int argc, const char **argv, const Command &command, bool ret
int result2profile(int argc, const char **argv, const Command &command) {
return result2profile(argc, argv, command, false);
}

int filterresult(int argc, const char **argv, const Command &command) {
return result2profile(argc, argv, command, true);
}

0 comments on commit 06bd0cf

Please sign in to comment.