Skip to content

Commit

Permalink
Get foldseek path from parameter, defaults to same place as spacedust…
Browse files Browse the repository at this point in the history
… binary
  • Loading branch information
milot-mirdita committed Feb 4, 2025
1 parent f09db40 commit f2dacc8
Show file tree
Hide file tree
Showing 7 changed files with 47 additions and 5 deletions.
2 changes: 2 additions & 0 deletions Spacedust.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,8 @@
" wget -q wget https://mmseqs.com/foldseek/foldseek-linux-avx2.tar.gz\n",
" tar -xzf foldseek-linux-avx2.tar.gz\n",
" rm -f foldseek-linux-avx2.tar.gz\n",
" mv foldseek/bin/foldseek spacedust/bin\n",
" rm -rf foldseek\n",
" touch FOLDSEEK_READY\n",
"fi\n",
"\n",
Expand Down
1 change: 0 additions & 1 deletion data/clusterdb.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ notExists() {

IN="$1"
TMP_PATH="$2"
FOLDSEEK="$(pwd)"/foldseek/bin/foldseek

[ ! -f "${IN}.dbtype" ] && echo "${IN}.dbtype not found!" && exit 1;
if [ -n "${USE_FOLDSEEK}" ]; then
Expand Down
1 change: 0 additions & 1 deletion data/clustersearch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ QUERY="$1"
TARGET="$2"
OUTPUT="$3"
TMP_PATH="$4"
FOLDSEEK="$(pwd)"/foldseek/bin/foldseek

if [ -n "${USE_FOLDSEEK}" ]; then
[ -n "${USE_PROFILE}" ] && [ ! -f "${TARGET}_foldseek_clu_seq.dbtype" ] && echo "${TARGET}_foldseek_clu_seq.dbtype not found! Please make sure the ${TARGET}_foldseek is clustered with clusterdb ${TARGET}_foldseek tmp --search-mode 1" && exit 1;
Expand Down
2 changes: 2 additions & 0 deletions examples/Spacedust.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,8 @@
" wget -q wget https://mmseqs.com/foldseek/foldseek-linux-avx2.tar.gz\n",
" tar -xzf foldseek-linux-avx2.tar.gz\n",
" rm -f foldseek-linux-avx2.tar.gz\n",
" mv foldseek/bin/foldseek spacedust/bin\n",
" rm -rf foldseek\n",
" touch FOLDSEEK_READY\n",
"fi\n",
"\n",
Expand Down
8 changes: 7 additions & 1 deletion src/commons/LocalParameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ class LocalParameters : public Parameters {
PARAMETER(PARAM_FILE_INCLUDE)
PARAMETER(PARAM_FILE_EXCLUDE)
PARAMETER(PARAM_GFF_DIR)
PARAMETER(PARAM_FOLDSEEK_PATH)

int clusterSearchMode;
float pMHThr;
Expand All @@ -53,6 +54,7 @@ class LocalParameters : public Parameters {
int suboptHitsFactor;
std::string fileInclude;
std::string fileExclude;
std::string foldseekPath;

private:
LocalParameters() :
Expand All @@ -68,7 +70,8 @@ class LocalParameters : public Parameters {
PARAM_PROFILE_CLUSTER_SEARCH(PARAM_PROFILE_CLUSTER_SEARCH_ID, "--profile-cluster-search", "Cluster search against profiles", "Perform profile(target)-sequence searches in clustersearch", typeid(bool), (void *) &profileClusterSearch, ""),
PARAM_FILE_INCLUDE(PARAM_FILE_INCLUDE_ID, "--file-include", "File Inclusion Regex", "Include file names based on this regex", typeid(std::string), (void *) &fileInclude, "^.*$"),
PARAM_FILE_EXCLUDE(PARAM_FILE_EXCLUDE_ID, "--file-exclude", "File Exclusion Regex", "Exclude file names based on this regex", typeid(std::string), (void *) &fileExclude, "^.*$"),
PARAM_GFF_DIR(PARAM_GFF_DIR_ID, "--gff-dir", "gff dir file", "Path to gff dir file", typeid(std::string), (void *) &gffDir, "")
PARAM_GFF_DIR(PARAM_GFF_DIR_ID, "--gff-dir", "gff dir file", "Path to gff dir file", typeid(std::string), (void *) &gffDir, ""),
PARAM_FOLDSEEK_PATH(PARAM_FOLDSEEK_PATH_ID, "--foldseek-path", "Path to Foldseek", "Path to Foldseek binary", typeid(std::string), (void *) &foldseekPath, "")
{

// clusterhits
Expand Down Expand Up @@ -132,6 +135,7 @@ class LocalParameters : public Parameters {
clustersearchworkflow = combineList(clustersearchworkflow, clusterhits);
clustersearchworkflow.push_back(&PARAM_PROFILE_CLUSTER_SEARCH);
clustersearchworkflow.push_back(&PARAM_CLUSTERSEARCH_MODE);
clustersearchworkflow.push_back(&PARAM_FOLDSEEK_PATH);

//aa2foldseek
aa2foldseek = combineList(prefilter, align);
Expand All @@ -141,6 +145,7 @@ class LocalParameters : public Parameters {
//clusterdb
clusterdb = combineList(clusterworkflow, profile2seq);
clusterdb.push_back(&PARAM_CLUSTERSEARCH_MODE);
clusterdb.push_back(&PARAM_FOLDSEEK_PATH);

clusterSearchMode = 0;
suboptHitsFactor = 0;
Expand All @@ -154,6 +159,7 @@ class LocalParameters : public Parameters {
fileInclude = ".*";
fileExclude = "^$";
gffDir = "";
foldseekPath = "foldseek";

//TODO: add citations (foldseek & mmseqs & clustersearch)
citations.emplace(CITATION_SPACEDUST, "");
Expand Down
19 changes: 18 additions & 1 deletion src/workflow/clusterdb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ void setclusterDbDefaults(LocalParameters *p) {
int clusterdb(int argc, const char **argv, const Command &command) {
LocalParameters &par = LocalParameters::getLocalInstance();
setclusterDbDefaults(&par);
par.foldseekPath = FileUtil::dirName(*(argv - 2)) + "/foldseek";
par.parseParameters(argc, argv, command, true, 0, 0);

if (FileUtil::directoryExists(par.db2.c_str()) == false) {
Expand Down Expand Up @@ -42,7 +43,23 @@ int clusterdb(int argc, const char **argv, const Command &command) {
if (par.removeTmpFiles) {
cmd.addVariable("REMOVE_TMP", "TRUE");
}
cmd.addVariable("USE_FOLDSEEK", par.clusterSearchMode == 1 ? "TRUE" : NULL);

bool useFoldseek = false;
if (par.clusterSearchMode == 1) {
useFoldseek = true;
struct stat st;
if (stat(par.foldseekPath.c_str(), &st) != 0) {
Debug(Debug::ERROR) << "Cannot find foldseek binary " << par.foldseekPath << ".\n";
EXIT(EXIT_FAILURE);
}
bool isExecutable = (st.st_mode & S_IXUSR) || (st.st_mode & S_IXGRP) || (st.st_mode & S_IXOTH);
if (isExecutable == false) {
Debug(Debug::ERROR) << "Cannot execute foldseek binary " << par.foldseekPath << ".\n";
EXIT(EXIT_FAILURE);
}
}
cmd.addVariable("FOLDSEEK", par.foldseekPath.c_str());
cmd.addVariable("USE_FOLDSEEK", useFoldseek ? "TRUE" : NULL);
cmd.addVariable("CLUSTER_PAR", par.createParameterString(par.clusterworkflow).c_str());
cmd.addVariable("CONSENSUS_PAR", par.createParameterString(par.profile2seq).c_str());
cmd.addVariable("THREADS_PAR", par.createParameterString(par.onlythreads).c_str());
Expand Down
19 changes: 18 additions & 1 deletion src/workflow/clustersearch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ void setClusterSearchWorkflowDefaults(LocalParameters *p) {
int clustersearch(int argc, const char **argv, const Command &command) {
LocalParameters &par = LocalParameters::getLocalInstance();
setClusterSearchWorkflowDefaults(&par);
par.foldseekPath = FileUtil::dirName(*(argv - 2)) + "/foldseek";

par.PARAM_MAX_REJECTED.addCategory(MMseqsParameter::COMMAND_EXPERT);
par.PARAM_DB_OUTPUT.addCategory(MMseqsParameter::COMMAND_EXPERT);
Expand Down Expand Up @@ -86,8 +87,24 @@ int clustersearch(int argc, const char **argv, const Command &command) {
if (par.removeTmpFiles) {
cmd.addVariable("REMOVE_TMP", "TRUE");
}

bool useFoldseek = false;
if (par.clusterSearchMode == 1) {
useFoldseek = true;
struct stat st;
if (stat(par.foldseekPath.c_str(), &st) != 0) {
Debug(Debug::ERROR) << "Cannot find foldseek binary " << par.foldseekPath << ".\n";
EXIT(EXIT_FAILURE);
}
bool isExecutable = (st.st_mode & S_IXUSR) || (st.st_mode & S_IXGRP) || (st.st_mode & S_IXOTH);
if (isExecutable == false) {
Debug(Debug::ERROR) << "Cannot execute foldseek binary " << par.foldseekPath << ".\n";
EXIT(EXIT_FAILURE);
}
}
cmd.addVariable("USE_PROFILE", par.profileClusterSearch == 1 ? "TRUE" : NULL);
cmd.addVariable("USE_FOLDSEEK", par.clusterSearchMode == 1 ? "TRUE" : NULL);
cmd.addVariable("FOLDSEEK", par.foldseekPath.c_str());
cmd.addVariable("USE_FOLDSEEK", useFoldseek ? "TRUE" : NULL);
cmd.addVariable("CLUSTER_PAR", par.createParameterString(par.clusterworkflow).c_str());
if(par.numIterations <= 1){
std::vector<MMseqsParameter*> searchwithoutnumiter;
Expand Down

0 comments on commit f2dacc8

Please sign in to comment.