From fea8d2037312f66aa20b0976cda1ebeba2ce354f Mon Sep 17 00:00:00 2001 From: Martin Steinegger Date: Mon, 19 Sep 2022 18:36:09 +0900 Subject: [PATCH] Add support for external k-mer thresholds for the prefilter --- src/mmseqs.cpp | 4 +++- src/prefiltering/Prefiltering.cpp | 7 +++++++ src/prefiltering/Prefiltering.h | 11 +++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/mmseqs.cpp b/src/mmseqs.cpp index 1abf996cd..cdbb3134e 100644 --- a/src/mmseqs.cpp +++ b/src/mmseqs.cpp @@ -1,5 +1,6 @@ #include "Command.h" #include "DownloadDatabase.h" +#include "Prefiltering.h" const char* binary_name = "mmseqs"; const char* tool_name = "MMseqs2"; @@ -13,5 +14,6 @@ bool hide_base_commands = false; void (*validatorUpdate)(void) = 0; std::vector commands = {}; std::vector externalDownloads = {}; +std::vector externalThreshold = {}; -bool hide_base_downloads = false; \ No newline at end of file +bool hide_base_downloads = false; diff --git a/src/prefiltering/Prefiltering.cpp b/src/prefiltering/Prefiltering.cpp index 0c562112f..dc7db0dd4 100644 --- a/src/prefiltering/Prefiltering.cpp +++ b/src/prefiltering/Prefiltering.cpp @@ -1008,6 +1008,12 @@ int Prefiltering::getKmerThreshold(const float sensitivity, const bool isProfile return kmerScore.sequence(); } float kmerThrBest = FLT_MAX; + int paramType = isProfile ? Parameters::DBTYPE_HMM_PROFILE : Parameters::DBTYPE_AMINO_ACIDS; + for(size_t i = 0; i < externalThreshold.size(); i++){ + if(kmerSize == externalThreshold[i].kmerSize && externalThreshold[i].sequenceType == paramType){ + return static_cast(externalThreshold[i].base - (externalThreshold[i].sensPerStep * sensitivity)); + } + } if (isProfile == true) { if (hasContextPseudoCnts == true) { if (kmerSize == 5) { @@ -1131,3 +1137,4 @@ std::pair Prefiltering::optimizeSplit(size_t totalMemoryInByte, DBRead } + diff --git a/src/prefiltering/Prefiltering.h b/src/prefiltering/Prefiltering.h index 2b15a0f5b..3eb8dd85b 100644 --- a/src/prefiltering/Prefiltering.h +++ b/src/prefiltering/Prefiltering.h @@ -15,6 +15,16 @@ class QueryMatcherTaxonomyHook; +struct KmerThreshold{ + int sequenceType; + int kmerSize; + float base; + float sensPerStep; +}; + +extern std::vector externalThreshold; + + class Prefiltering { public: Prefiltering( @@ -130,3 +140,4 @@ class Prefiltering { }; #endif +