Skip to content

Commit

Permalink
Include file size and modified date of inputs in tmp file hash calcul…
Browse files Browse the repository at this point in the history
…ation #372
  • Loading branch information
milot-mirdita committed Nov 12, 2020
1 parent cc47254 commit 45c4de7
Show file tree
Hide file tree
Showing 21 changed files with 64 additions and 25 deletions.
43 changes: 41 additions & 2 deletions src/commons/Parameters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2289,18 +2289,57 @@ std::vector<MMseqsParameter*> Parameters::combineList(const std::vector<MMseqsPa
return retVec;
}

size_t Parameters::hashParameter(const std::vector<std::string> &filenames, const std::vector<MMseqsParameter*> &par){
size_t Parameters::hashParameter(const std::vector<DbType> &dbtypes, const std::vector<std::string> &filenames, const std::vector<MMseqsParameter*> &par){
std::string hashString;
hashString.reserve(1024);

struct stat stat_buf;
bool stopAfterVariadic = false;
for (size_t i = 0; i < filenames.size(); ++i){
hashString.append(filenames[i]);
hashString.append(" ");
if (stopAfterVariadic == false && i < dbtypes.size()) {
const DbType& type = dbtypes[i];
if (type.accessMode != DbType::ACCESS_MODE_INPUT) {
continue;
}

if (type.specialType & DbType::VARIADIC) {
stopAfterVariadic = true;
}

if (filenames[i] == "stdin") {
continue;
}

if (::stat(filenames[i].c_str(), &stat_buf) == 0) {
hashString.append(SSTR(stat_buf.st_size));
#ifdef __APPLE__
hashString.append(SSTR(stat_buf.st_mtimespec.tv_sec));
#else
hashString.append(SSTR(stat_buf.st_mtime));
#endif
continue;
}

std::string index(filenames[i]);
index.append(".index");
if (::stat(index.c_str(), &stat_buf) == 0) {
hashString.append(SSTR(stat_buf.st_size));
#ifdef __APPLE__
hashString.append(SSTR(stat_buf.st_mtimespec.tv_sec));
#else
hashString.append(SSTR(stat_buf.st_mtime));
#endif
continue;
}
}
}
hashString.append(createParameterString(par));
hashString.append(version);
for (int i = 0; i < restArgc; ++i) {
hashString.append(restArgv[i]);
}
Debug(Debug::ERROR) << hashString << "\n";
return Util::hash(hashString.c_str(), hashString.size());
}

Expand Down
2 changes: 1 addition & 1 deletion src/commons/Parameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -1040,7 +1040,7 @@ class Parameters {
std::vector<MMseqsParameter*> combineList(const std::vector<MMseqsParameter*> &par1,
const std::vector<MMseqsParameter*> &par2);

size_t hashParameter(const std::vector<std::string> &filenames, const std::vector<MMseqsParameter*> &par);
size_t hashParameter(const std::vector<DbType> &dbtypes, const std::vector<std::string> &filenames, const std::vector<MMseqsParameter*> &par);

std::string createParameterString(const std::vector<MMseqsParameter*> &vector, bool wasSet = false);

Expand Down
2 changes: 1 addition & 1 deletion src/multihit/MultiHitDb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ int multihitdb(int argc, const char **argv, const Command &command) {
Debug(Debug::INFO) << "Created dir " << tmpDir << "\n";
}
}
std::string hash = SSTR(par.hashParameter(par.filenames, par.multihitdb));
std::string hash = SSTR(par.hashParameter(command.databases, par.filenames, par.multihitdb));
if(par.reuseLatest == true){
hash = FileUtil::getHashFromSymLink(tmpDir + "/latest" );
}
Expand Down
2 changes: 1 addition & 1 deletion src/multihit/MultiHitSearch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ int multihitsearch(int argc, const char **argv, const Command &command) {
Debug(Debug::INFO) << "Created dir " << par.db4 << "\n";
}
}
size_t hash = par.hashParameter(par.filenames, par.multihitsearch);
size_t hash = par.hashParameter(command.databases, par.filenames, par.multihitsearch);
std::string tmpDir = par.db4 + "/" + SSTR(hash);
if (FileUtil::directoryExists(tmpDir.c_str()) == false) {
if (FileUtil::makeDir(tmpDir.c_str()) == false) {
Expand Down
2 changes: 1 addition & 1 deletion src/workflow/Cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ int clusteringworkflow(int argc, const char **argv, const Command& command) {
setClusterAutomagicParameters(par);

std::string tmpDir = par.db3;
std::string hash = SSTR(par.hashParameter(par.filenames, par.clusterworkflow));
std::string hash = SSTR(par.hashParameter(command.databases, par.filenames, par.clusterworkflow));
if (par.reuseLatest) {
hash = FileUtil::getHashFromSymLink(tmpDir + "/latest");
}
Expand Down
2 changes: 1 addition & 1 deletion src/workflow/ClusterUpdate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ int clusterupdate(int argc, const char **argv, const Command& command) {
par.maxAccept = maxAccept;

std::string tmpDir = par.db6;
std::string hash = SSTR(par.hashParameter(par.filenames, par.clusterUpdate));
std::string hash = SSTR(par.hashParameter(command.databases, par.filenames, par.clusterUpdate));
if (par.reuseLatest) {
hash = FileUtil::getHashFromSymLink(tmpDir + "/latest");
}
Expand Down
8 changes: 4 additions & 4 deletions src/workflow/CreateIndex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#include <cassert>
#include <climits>

int createindex(Parameters &par, std::string indexerModule, std::string flag) {
int createindex(Parameters &par, const Command &command, const std::string &indexerModule, const std::string &flag) {
bool sensitivity = false;
// only set kmerScore to INT_MAX if -s was used
for (size_t i = 0; i < par.createindex.size(); i++) {
Expand All @@ -30,7 +30,7 @@ int createindex(Parameters &par, std::string indexerModule, std::string flag) {
}

std::string tmpDir = par.db2;
std::string hash = SSTR(par.hashParameter(par.filenames, par.createindex));
std::string hash = SSTR(par.hashParameter(command.databases, par.filenames, par.createindex));
if (par.reuseLatest) {
hash = FileUtil::getHashFromSymLink(tmpDir + "/latest");
}
Expand Down Expand Up @@ -100,7 +100,7 @@ int createlinindex(int argc, const char **argv, const Command& command) {
<< "Please provide the parameter --search-type 2 (translated) or 3 (nucleotide)\n";
return EXIT_FAILURE;
}
return createindex(par, "kmerindexdb", (isNucl == false) ? "" : (par.searchType == Parameters::SEARCH_TYPE_TRANSLATED||
return createindex(par, command, "kmerindexdb", (isNucl == false) ? "" : (par.searchType == Parameters::SEARCH_TYPE_TRANSLATED||
par.searchType == Parameters::SEARCH_TYPE_TRANS_NUCL_ALN) ? "TRANSLATED" : "LIN_NUCL");
}

Expand Down Expand Up @@ -169,6 +169,6 @@ int createindex(int argc, const char **argv, const Command& command) {
<< "Please provide the parameter --search-type 2 (translated) or 3 (nucleotide)\n";
return EXIT_FAILURE;
}
return createindex(par, "indexdb", (isNucl == false) ? "" : (par.searchType == Parameters::SEARCH_TYPE_TRANSLATED||
return createindex(par, command, "indexdb", (isNucl == false) ? "" : (par.searchType == Parameters::SEARCH_TYPE_TRANSLATED||
par.searchType == Parameters::SEARCH_TYPE_TRANS_NUCL_ALN) ? "TRANSLATED" : "NUCL");
}
2 changes: 1 addition & 1 deletion src/workflow/Databases.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ int databases(int argc, const char **argv, const Command &command) {
}
par.printParameters(command.cmd, argc, argv, par.databases);
std::string tmpDir = par.db3;
std::string hash = SSTR(par.hashParameter(par.filenames, par.databases));
std::string hash = SSTR(par.hashParameter(command.databases, par.filenames, par.databases));
if (par.reuseLatest) {
hash = FileUtil::getHashFromSymLink(tmpDir + "/latest");
}
Expand Down
2 changes: 1 addition & 1 deletion src/workflow/EasyCluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ int easycluster(int argc, const char **argv, const Command &command) {
setEasyClusterMustPassAlong(&par);

std::string tmpDir = par.filenames.back();
std::string hash = SSTR(par.hashParameter(par.filenames, *command.params));
std::string hash = SSTR(par.hashParameter(command.databases, par.filenames, *command.params));
if (par.reuseLatest) {
hash = FileUtil::getHashFromSymLink(tmpDir + "/latest");
}
Expand Down
2 changes: 1 addition & 1 deletion src/workflow/EasyLinclust.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ int easylinclust(int argc, const char **argv, const Command &command) {
setEasyLinclustMustPassAlong(&par);

std::string tmpDir = par.filenames.back();
std::string hash = SSTR(par.hashParameter(par.filenames, *command.params));
std::string hash = SSTR(par.hashParameter(command.databases, par.filenames, *command.params));
if (par.reuseLatest) {
hash = FileUtil::getHashFromSymLink(tmpDir + "/latest");
}
Expand Down
2 changes: 1 addition & 1 deletion src/workflow/EasyRbh.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ int easyrbh(int argc, const char **argv, const Command &command) {
}

std::string tmpDir = par.filenames.back();
std::string hash = SSTR(par.hashParameter(par.filenames, *command.params));
std::string hash = SSTR(par.hashParameter(command.databases, par.filenames, *command.params));
if (par.reuseLatest) {
hash = FileUtil::getHashFromSymLink(tmpDir + "/latest");
}
Expand Down
2 changes: 1 addition & 1 deletion src/workflow/EasySearch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ int doeasysearch(int argc, const char **argv, const Command &command, bool linse
}

std::string tmpDir = par.filenames.back();
std::string hash = SSTR(par.hashParameter(par.filenames, *command.params));
std::string hash = SSTR(par.hashParameter(command.databases, par.filenames, *command.params));
if (par.reuseLatest) {
hash = FileUtil::getHashFromSymLink(tmpDir + "/latest");
}
Expand Down
2 changes: 1 addition & 1 deletion src/workflow/EasyTaxonomy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ int easytaxonomy(int argc, const char **argv, const Command& command) {
setEasyTaxonomyMustPassAlong(&par);

std::string tmpDir = par.filenames.back();
std::string hash = SSTR(par.hashParameter(par.filenames, *command.params));
std::string hash = SSTR(par.hashParameter(command.databases, par.filenames, *command.params));
if (par.reuseLatest) {
hash = FileUtil::getHashFromSymLink(tmpDir + "/latest");
}
Expand Down
2 changes: 1 addition & 1 deletion src/workflow/Enrich.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ int enrich(int argc, const char **argv, const Command &command) {
par.parseParameters(argc, argv, command, true, 0, 0);

std::string tmpDir = par.db6;
std::string hash = SSTR(par.hashParameter(par.filenames, par.enrichworkflow));
std::string hash = SSTR(par.hashParameter(command.databases, par.filenames, par.enrichworkflow));
if (par.reuseLatest) {
hash = FileUtil::getHashFromSymLink(tmpDir + "/latest");
}
Expand Down
2 changes: 1 addition & 1 deletion src/workflow/Linclust.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ int linclust(int argc, const char **argv, const Command& command) {
par.parseParameters(argc, argv, command, true, 0, 0);

std::string tmpDir = par.db3;
std::string hash = SSTR(par.hashParameter(par.filenames, par.linclustworkflow));
std::string hash = SSTR(par.hashParameter(command.databases, par.filenames, par.linclustworkflow));
if (par.reuseLatest) {
hash = FileUtil::getHashFromSymLink(tmpDir + "/latest");
}
Expand Down
2 changes: 1 addition & 1 deletion src/workflow/Linsearch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ int linsearch(int argc, const char **argv, const Command &command) {
par.printParameters(command.cmd, argc, argv, par.searchworkflow);

std::string tmpDir = par.db4;
std::string hash = SSTR(par.hashParameter(par.filenames, par.linsearchworkflow));
std::string hash = SSTR(par.hashParameter(command.databases, par.filenames, par.linsearchworkflow));
if (par.reuseLatest) {
hash = FileUtil::getHashFromSymLink(tmpDir + "/latest");
}
Expand Down
2 changes: 1 addition & 1 deletion src/workflow/Map.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ int map(int argc, const char **argv, const Command &command) {
par.parseParameters(argc, argv, command, true, 0, 0);

std::string tmpDir = par.db4;
std::string hash = SSTR(par.hashParameter(par.filenames, par.mapworkflow));
std::string hash = SSTR(par.hashParameter(command.databases, par.filenames, par.mapworkflow));
if (par.reuseLatest) {
hash = FileUtil::getHashFromSymLink(tmpDir + "/latest");
}
Expand Down
2 changes: 1 addition & 1 deletion src/workflow/Rbh.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ int rbh(int argc, const char **argv, const Command &command) {


std::string tmpDir = par.db4;
std::string hash = SSTR(par.hashParameter(par.filenames, par.searchworkflow));
std::string hash = SSTR(par.hashParameter(command.databases, par.filenames, par.searchworkflow));
if (par.reuseLatest) {
hash = FileUtil::getHashFromSymLink(tmpDir + "/latest");
}
Expand Down
2 changes: 1 addition & 1 deletion src/workflow/Search.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ int search(int argc, const char **argv, const Command& command) {
par.printParameters(command.cmd, argc, argv, par.searchworkflow);

std::string tmpDir = par.db4;
std::string hash = SSTR(par.hashParameter(par.filenames, par.searchworkflow));
std::string hash = SSTR(par.hashParameter(command.databases, par.filenames, par.searchworkflow));
if (par.reuseLatest) {
hash = FileUtil::getHashFromSymLink(tmpDir + "/latest");
}
Expand Down
2 changes: 1 addition & 1 deletion src/workflow/TaxPerContig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ int taxpercontig(int argc, const char **argv, const Command& command) {
setTaxPerContigMustPassAlong(&par);

std::string tmpDir = par.db4;
std::string hash = SSTR(par.hashParameter(par.filenames, *command.params));
std::string hash = SSTR(par.hashParameter(command.databases, par.filenames, *command.params));
if (par.reuseLatest) {
hash = FileUtil::getHashFromSymLink(tmpDir + "/latest");
}
Expand Down
2 changes: 1 addition & 1 deletion src/workflow/Taxonomy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ int taxonomy(int argc, const char **argv, const Command& command) {
setTaxonomyMustPassAlong(&par);

std::string tmpDir = par.db4;
std::string hash = SSTR(par.hashParameter(par.filenames, par.taxonomy));
std::string hash = SSTR(par.hashParameter(command.databases, par.filenames, par.taxonomy));
if (par.reuseLatest) {
hash = FileUtil::getHashFromSymLink(tmpDir + "/latest");
}
Expand Down

0 comments on commit 45c4de7

Please sign in to comment.