From a506d677f2ea4d12eaef113ea031fe01a4ecd52e Mon Sep 17 00:00:00 2001 From: Milot Mirdita Date: Wed, 29 Dec 2021 17:50:51 +0100 Subject: [PATCH] Allow subprojects to build their own precomputed indices --- src/MMseqsBase.cpp | 2 + src/commons/IndexReader.h | 53 +++++++++++++------- src/mmseqs.cpp | 2 + src/prefiltering/PrefilteringIndexReader.cpp | 3 +- 4 files changed, 42 insertions(+), 18 deletions(-) diff --git a/src/MMseqsBase.cpp b/src/MMseqsBase.cpp index 9732199ab..53276c46e 100644 --- a/src/MMseqsBase.cpp +++ b/src/MMseqsBase.cpp @@ -3,6 +3,8 @@ #include "CommandDeclarations.h" #include "DownloadDatabase.h" +const char* MMSEQS_CURRENT_INDEX_VERSION = "16"; + Parameters& par = Parameters::getInstance(); std::vector baseCommands = { {"easy-search", easysearch, &par.easysearchworkflow, COMMAND_EASY, diff --git a/src/commons/IndexReader.h b/src/commons/IndexReader.h index 48307e851..18b7e7567 100644 --- a/src/commons/IndexReader.h +++ b/src/commons/IndexReader.h @@ -8,12 +8,18 @@ class IndexReader { public: - const static int PRELOAD_NO = 0; - const static int PRELOAD_DATA = 1; - const static int PRELOAD_INDEX = 2; + const static unsigned int PRELOAD_NO = 0; + const static unsigned int PRELOAD_DATA = 1; + const static unsigned int PRELOAD_INDEX = 2; - IndexReader(const std::string &dataName, int threads, int databaseType = SEQUENCES | HEADERS, int preloadMode = false, int dataMode=(DBReader::USE_INDEX | DBReader::USE_DATA)) - : sequenceReader(NULL), index(NULL) { + IndexReader( + const std::string &dataName, + int threads, + unsigned int databaseType = SEQUENCES | HEADERS, + unsigned int preloadMode = false, + int dataMode = DBReader::USE_INDEX | DBReader::USE_DATA, + std::string failSuffix = "" + ) : sequenceReader(NULL), index(NULL) { int targetDbtype = FileUtil::parseDbType(dataName.c_str()); if (Parameters::isEqualDbtype(targetDbtype, Parameters::DBTYPE_INDEX_DB)) { index = new DBReader(dataName.c_str(), (dataName + ".index").c_str(), 1, DBReader::USE_DATA|DBReader::USE_INDEX); @@ -24,13 +30,19 @@ class IndexReader { seqType = data.seqType; bool touchIndex = preloadMode & PRELOAD_INDEX; bool touchData = preloadMode & PRELOAD_DATA; - if (databaseType & SRC_SEQUENCES) { + if (databaseType & USER_SELECT) { + sequenceReader = PrefilteringIndexReader::openNewReader( + index, + (databaseType & ~USER_SELECT) + 1, + databaseType & ~USER_SELECT, + dataMode & DBReader::USE_DATA, threads, touchIndex, touchData + ); + } else if (databaseType & SRC_SEQUENCES) { sequenceReader = PrefilteringIndexReader::openNewReader(index, - PrefilteringIndexReader::DBR2DATA, PrefilteringIndexReader::DBR2INDEX, dataMode & DBReader::USE_DATA, threads, touchIndex, touchData); - + PrefilteringIndexReader::DBR2DATA, PrefilteringIndexReader::DBR2INDEX, dataMode & DBReader::USE_DATA, threads, touchIndex, touchData); } else if (databaseType & SEQUENCES) { sequenceReader = PrefilteringIndexReader::openNewReader(index, - PrefilteringIndexReader::DBR1DATA, PrefilteringIndexReader::DBR1INDEX, dataMode & DBReader::USE_DATA, threads, touchIndex, touchData); + PrefilteringIndexReader::DBR1DATA, PrefilteringIndexReader::DBR1INDEX, dataMode & DBReader::USE_DATA, threads, touchIndex, touchData); } else if (databaseType & SRC_HEADERS) { sequenceReader = PrefilteringIndexReader::openNewHeaderReader(index, @@ -63,10 +75,12 @@ class IndexReader { if (sequenceReader == NULL) { if (databaseType & (HEADERS | SRC_HEADERS)) { - sequenceReader = new DBReader((dataName + "_h").c_str(), (dataName + "_h.index").c_str(), threads, dataMode); - } else { - sequenceReader = new DBReader(dataName.c_str(), (dataName + ".index").c_str(), threads, dataMode); + failSuffix = "_h"; } + sequenceReader = new DBReader( + (dataName + failSuffix).c_str(), (dataName + failSuffix + ".index").c_str(), + threads, dataMode + ); sequenceReader->open(DBReader::NOSORT); bool touchData = preloadMode & PRELOAD_DATA; if (touchData) { @@ -76,11 +90,16 @@ class IndexReader { } } - static const int SEQUENCES = 1; - static const int HEADERS = 2; - static const int SRC_HEADERS = 4; - static const int SRC_SEQUENCES = 8; - static const int ALIGNMENTS = 16; + static const unsigned int SEQUENCES = 1; + static const unsigned int HEADERS = 2; + static const unsigned int SRC_HEADERS = 4; + static const unsigned int SRC_SEQUENCES = 8; + static const unsigned int ALIGNMENTS = 16; + static const unsigned int USER_SELECT = 1 << 31; + + static unsigned int makeUserDatabaseType(unsigned int baseKey) { + return baseKey | USER_SELECT; + } int getDbtype() const { return seqType; diff --git a/src/mmseqs.cpp b/src/mmseqs.cpp index 6c36cb23c..1abf996cd 100644 --- a/src/mmseqs.cpp +++ b/src/mmseqs.cpp @@ -7,6 +7,8 @@ const char* tool_introduction = "MMseqs2 (Many against Many sequence searching) const char* main_author = "Martin Steinegger (martin.steinegger@snu.ac.kr)"; const char* show_extended_help = "1"; const char* show_bash_info = "1"; +extern const char* MMSEQS_CURRENT_INDEX_VERSION; +const char* index_version_compatible = MMSEQS_CURRENT_INDEX_VERSION; bool hide_base_commands = false; void (*validatorUpdate)(void) = 0; std::vector commands = {}; diff --git a/src/prefiltering/PrefilteringIndexReader.cpp b/src/prefiltering/PrefilteringIndexReader.cpp index 2df7e7e99..a8fac444b 100644 --- a/src/prefiltering/PrefilteringIndexReader.cpp +++ b/src/prefiltering/PrefilteringIndexReader.cpp @@ -6,7 +6,8 @@ #include "IndexBuilder.h" #include "Parameters.h" -const char* PrefilteringIndexReader::CURRENT_VERSION = "16"; +extern const char* index_version_compatible; +const char* PrefilteringIndexReader::CURRENT_VERSION = index_version_compatible; unsigned int PrefilteringIndexReader::VERSION = 0; unsigned int PrefilteringIndexReader::META = 1; unsigned int PrefilteringIndexReader::SCOREMATRIXNAME = 2;