Skip to content

Commit

Permalink
createsubdb and view can now return results from identifiers in .look…
Browse files Browse the repository at this point in the history
…up with --id-mode 1
  • Loading branch information
milot-mirdita committed Apr 1, 2021
1 parent 6622c9f commit 14a3dce
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 4 deletions.
4 changes: 4 additions & 0 deletions src/commons/Parameters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@ Parameters::Parameters():
PARAM_TAX_OUTPUT_MODE(PARAM_TAX_OUTPUT_MODE_ID, "--tax-output-mode", "Taxonomy output mode", "0: output LCA, 1: output alignment 2: output both", typeid(int), (void *) &taxonomyOutputMode, "^[0-2]{1}$"),
// createsubdb, filtertaxseqdb
PARAM_SUBDB_MODE(PARAM_SUBDB_MODE_ID, "--subdb-mode", "Subdb mode", "Subdb mode 0: copy data 1: soft link data and write index", typeid(int), (void *) &subDbMode, "^[0-1]{1}$"),
PARAM_ID_MODE(PARAM_ID_MODE_ID, "--id-mode", "Database ID mode", "Select DB entries based on 0: database keys, 1: FASTA identifiers (.lookup)", typeid(int), (void *) &dbIdMode, "^[0-1]{1}$"),
PARAM_TAR_INCLUDE(PARAM_TAR_INCLUDE_ID, "--tar-include", "Tar Inclusion Regex", "Include file names based on this regex", typeid(std::string), (void *) &tarInclude, "^.*$"),
PARAM_TAR_EXCLUDE(PARAM_TAR_EXCLUDE_ID, "--tar-exclude", "Tar Exclusion Regex", "Exclude file names based on this regex", typeid(std::string), (void *) &tarExclude, "^.*$"),
// unpackdb
Expand Down Expand Up @@ -1030,6 +1031,7 @@ Parameters::Parameters():
// createsubdb
createsubdb.push_back(&PARAM_SUBDB_MODE);
createsubdb.push_back(&PARAM_ID_MODE);
createsubdb.push_back(&PARAM_V);
// renamedbkeys
Expand Down Expand Up @@ -1060,6 +1062,7 @@ Parameters::Parameters():
// view
view.push_back(&PARAM_ID_LIST);
view.push_back(&PARAM_ID_MODE);
view.push_back(&PARAM_IDX_ENTRY_TYPE);
view.push_back(&PARAM_V);
Expand Down Expand Up @@ -2344,6 +2347,7 @@ void Parameters::setDefaults() {

// createsubdb
subDbMode = Parameters::SUBDB_MODE_HARD;
dbIdMode = Parameters::ID_MODE_KEYS;

// tar2db
tarInclude = ".*";
Expand Down
5 changes: 5 additions & 0 deletions src/commons/Parameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,9 @@ class Parameters {
static const int SUBDB_MODE_HARD = 0;
static const int SUBDB_MODE_SOFT = 1;

static const int ID_MODE_KEYS = 0;
static const int ID_MODE_LOOKUP = 1;

// unpackdb
static const int UNPACK_NAME_KEY = 0;
static const int UNPACK_NAME_ACCESSION = 1;
Expand Down Expand Up @@ -635,6 +638,7 @@ class Parameters {

// createsubdb
int subDbMode;
int dbIdMode;

// tar2db
std::string tarInclude;
Expand Down Expand Up @@ -959,6 +963,7 @@ class Parameters {

// createsubdb
PARAMETER(PARAM_SUBDB_MODE)
PARAMETER(PARAM_ID_MODE)

// tar2db
PARAMETER(PARAM_TAR_INCLUDE)
Expand Down
20 changes: 18 additions & 2 deletions src/util/createsubdb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,12 @@ int createsubdb(int argc, const char **argv, const Command& command) {
}
}

DBReader<unsigned int> reader(par.db2.c_str(), par.db2Index.c_str(), 1, DBReader<unsigned int>::USE_INDEX|DBReader<unsigned int>::USE_DATA);
const bool lookupMode = par.dbIdMode == Parameters::ID_MODE_LOOKUP;
int dbMode = DBReader<unsigned int>::USE_INDEX|DBReader<unsigned int>::USE_DATA;
if (lookupMode) {
dbMode |= DBReader<unsigned int>::USE_LOOKUP_REV;
}
DBReader<unsigned int> reader(par.db2.c_str(), par.db2Index.c_str(), 1, dbMode);
reader.open(DBReader<unsigned int>::NOSORT);
const bool isCompressed = reader.isCompressed();

Expand All @@ -37,7 +42,18 @@ int createsubdb(int argc, const char **argv, const Command& command) {
bool isOrdered = true;
while (getline(&line, &len, orderFile) != -1) {
Util::parseKey(line, dbKey);
const unsigned int key = Util::fast_atoi<unsigned int>(dbKey);
unsigned int key;
if (lookupMode) {
size_t lookupId = reader.getLookupIdByAccession(dbKey);
if (lookupId == SIZE_MAX) {
Debug(Debug::WARNING) << "Could not find name " << dbKey << " in lookup\n";
continue;
}
key = reader.getLookupKey(lookupId);
} else {
key = Util::fast_atoi<unsigned int>(dbKey);
}

isOrdered &= (prevKey <= key);
prevKey = key;
const size_t id = reader.getId(key);
Expand Down
21 changes: 19 additions & 2 deletions src/util/view.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,26 @@ int view(int argc, const char **argv, const Command& command) {
indexSrcType = IndexReader::SRC_HEADERS;
break;
}
IndexReader reader(par.db1, par.threads, indexSrcType, 0);
const bool lookupMode = par.dbIdMode == Parameters::ID_MODE_LOOKUP;
int dbMode = DBReader<unsigned int>::USE_INDEX|DBReader<unsigned int>::USE_DATA;
if (lookupMode) {
dbMode |= DBReader<unsigned int>::USE_LOOKUP_REV;
}
IndexReader reader(par.db1, par.threads, indexSrcType, false, dbMode);
for (size_t i = 0; i < ids.size(); ++i) {
const unsigned int key = Util::fast_atoi<unsigned int>(ids[i].c_str());
unsigned int key;
std::string& ref = ids[i];
if (lookupMode) {
size_t lookupId = reader.sequenceReader->getLookupIdByAccession(ref);
if (lookupId == SIZE_MAX) {
Debug(Debug::WARNING) << "Could not find " << ref << " in lookup\n";
continue;
}
key = reader.sequenceReader->getLookupKey(lookupId);
} else {
key = Util::fast_atoi<unsigned int>(ref.c_str());
}

const size_t id = reader.sequenceReader->getId(key);
if (id >= UINT_MAX) {
Debug(Debug::ERROR) << "Key " << ids[i] << " not found in database\n";
Expand Down

0 comments on commit 14a3dce

Please sign in to comment.