diff --git a/src/commons/Parameters.cpp b/src/commons/Parameters.cpp index 0534e8d1c..ba797ce64 100644 --- a/src/commons/Parameters.cpp +++ b/src/commons/Parameters.cpp @@ -272,6 +272,7 @@ Parameters::Parameters(): PARAM_TAX_OUTPUT_MODE(PARAM_TAX_OUTPUT_MODE_ID, "--tax-output-mode", "Taxonomy output mode", "0: output LCA, 1: output alignment 2: output both", typeid(int), (void *) &taxonomyOutputMode, "^[0-2]{1}$"), // createsubdb, filtertaxseqdb PARAM_SUBDB_MODE(PARAM_SUBDB_MODE_ID, "--subdb-mode", "Subdb mode", "Subdb mode 0: copy data 1: soft link data and write index", typeid(int), (void *) &subDbMode, "^[0-1]{1}$"), + PARAM_ID_MODE(PARAM_ID_MODE_ID, "--id-mode", "Database ID mode", "Select DB entries based on 0: database keys, 1: FASTA identifiers (.lookup)", typeid(int), (void *) &dbIdMode, "^[0-1]{1}$"), PARAM_TAR_INCLUDE(PARAM_TAR_INCLUDE_ID, "--tar-include", "Tar Inclusion Regex", "Include file names based on this regex", typeid(std::string), (void *) &tarInclude, "^.*$"), PARAM_TAR_EXCLUDE(PARAM_TAR_EXCLUDE_ID, "--tar-exclude", "Tar Exclusion Regex", "Exclude file names based on this regex", typeid(std::string), (void *) &tarExclude, "^.*$"), // unpackdb @@ -1030,6 +1031,7 @@ Parameters::Parameters(): // createsubdb createsubdb.push_back(&PARAM_SUBDB_MODE); + createsubdb.push_back(&PARAM_ID_MODE); createsubdb.push_back(&PARAM_V); // renamedbkeys @@ -1060,6 +1062,7 @@ Parameters::Parameters(): // view view.push_back(&PARAM_ID_LIST); + view.push_back(&PARAM_ID_MODE); view.push_back(&PARAM_IDX_ENTRY_TYPE); view.push_back(&PARAM_V); @@ -2344,6 +2347,7 @@ void Parameters::setDefaults() { // createsubdb subDbMode = Parameters::SUBDB_MODE_HARD; + dbIdMode = Parameters::ID_MODE_KEYS; // tar2db tarInclude = ".*"; diff --git a/src/commons/Parameters.h b/src/commons/Parameters.h index 4accb5157..7b303a0e6 100644 --- a/src/commons/Parameters.h +++ b/src/commons/Parameters.h @@ -280,6 +280,9 @@ class Parameters { static const int SUBDB_MODE_HARD = 0; static const int SUBDB_MODE_SOFT = 1; + static const int ID_MODE_KEYS = 0; + static const int ID_MODE_LOOKUP = 1; + // unpackdb static const int UNPACK_NAME_KEY = 0; static const int UNPACK_NAME_ACCESSION = 1; @@ -635,6 +638,7 @@ class Parameters { // createsubdb int subDbMode; + int dbIdMode; // tar2db std::string tarInclude; @@ -959,6 +963,7 @@ class Parameters { // createsubdb PARAMETER(PARAM_SUBDB_MODE) + PARAMETER(PARAM_ID_MODE) // tar2db PARAMETER(PARAM_TAR_INCLUDE) diff --git a/src/util/createsubdb.cpp b/src/util/createsubdb.cpp index b54c65b59..e15469545 100644 --- a/src/util/createsubdb.cpp +++ b/src/util/createsubdb.cpp @@ -23,7 +23,12 @@ int createsubdb(int argc, const char **argv, const Command& command) { } } - DBReader reader(par.db2.c_str(), par.db2Index.c_str(), 1, DBReader::USE_INDEX|DBReader::USE_DATA); + const bool lookupMode = par.dbIdMode == Parameters::ID_MODE_LOOKUP; + int dbMode = DBReader::USE_INDEX|DBReader::USE_DATA; + if (lookupMode) { + dbMode |= DBReader::USE_LOOKUP_REV; + } + DBReader reader(par.db2.c_str(), par.db2Index.c_str(), 1, dbMode); reader.open(DBReader::NOSORT); const bool isCompressed = reader.isCompressed(); @@ -37,7 +42,18 @@ int createsubdb(int argc, const char **argv, const Command& command) { bool isOrdered = true; while (getline(&line, &len, orderFile) != -1) { Util::parseKey(line, dbKey); - const unsigned int key = Util::fast_atoi(dbKey); + unsigned int key; + if (lookupMode) { + size_t lookupId = reader.getLookupIdByAccession(dbKey); + if (lookupId == SIZE_MAX) { + Debug(Debug::WARNING) << "Could not find name " << dbKey << " in lookup\n"; + continue; + } + key = reader.getLookupKey(lookupId); + } else { + key = Util::fast_atoi(dbKey); + } + isOrdered &= (prevKey <= key); prevKey = key; const size_t id = reader.getId(key); diff --git a/src/util/view.cpp b/src/util/view.cpp index e4cfbc7bf..59938b2f7 100644 --- a/src/util/view.cpp +++ b/src/util/view.cpp @@ -22,9 +22,26 @@ int view(int argc, const char **argv, const Command& command) { indexSrcType = IndexReader::SRC_HEADERS; break; } - IndexReader reader(par.db1, par.threads, indexSrcType, 0); + const bool lookupMode = par.dbIdMode == Parameters::ID_MODE_LOOKUP; + int dbMode = DBReader::USE_INDEX|DBReader::USE_DATA; + if (lookupMode) { + dbMode |= DBReader::USE_LOOKUP_REV; + } + IndexReader reader(par.db1, par.threads, indexSrcType, false, dbMode); for (size_t i = 0; i < ids.size(); ++i) { - const unsigned int key = Util::fast_atoi(ids[i].c_str()); + unsigned int key; + std::string& ref = ids[i]; + if (lookupMode) { + size_t lookupId = reader.sequenceReader->getLookupIdByAccession(ref); + if (lookupId == SIZE_MAX) { + Debug(Debug::WARNING) << "Could not find " << ref << " in lookup\n"; + continue; + } + key = reader.sequenceReader->getLookupKey(lookupId); + } else { + key = Util::fast_atoi(ref.c_str()); + } + const size_t id = reader.sequenceReader->getId(key); if (id >= UINT_MAX) { Debug(Debug::ERROR) << "Key " << ids[i] << " not found in database\n";