diff --git a/data/workflow/tsv2exprofiledb.sh b/data/workflow/tsv2exprofiledb.sh index e2b8237f..184315d7 100644 --- a/data/workflow/tsv2exprofiledb.sh +++ b/data/workflow/tsv2exprofiledb.sh @@ -17,25 +17,37 @@ OUT="$2" [ -d "${OUT}.tsv" ] && echo "${OUT} is a directory!" && exit 1; if notExists "${OUT}_h.dbtype"; then - "$MMSEQS" tsv2db "${IN}_h.tsv" "${OUT}_h" --output-dbtype 12 ${VERBOSITY} + MMSEQS_FORCE_MERGE=1 "$MMSEQS" tsv2db "${IN}_h.tsv" "${OUT}_h" --output-dbtype 12 ${VERBOSITY} fi if notExists "${OUT}.dbtype"; then - "$MMSEQS" tsv2db "${IN}.tsv" "${OUT}_tmp" --output-dbtype 0 ${VERBOSITY} - MMSEQS_FORCE_MERGE=1 "$MMSEQS" compress "${OUT}_tmp" "${OUT}" ${VERBOSITY} - "$MMSEQS" rmdb "${OUT}_tmp" ${VERBOSITY} + if [ -n "${COMPRESSED}" ]; then + "$MMSEQS" tsv2db "${IN}.tsv" "${OUT}_tmp" --output-dbtype 0 ${VERBOSITY} + MMSEQS_FORCE_MERGE=1 "$MMSEQS" compress "${OUT}_tmp" "${OUT}" ${VERBOSITY} + "$MMSEQS" rmdb "${OUT}_tmp" ${VERBOSITY} + else + MMSEQS_FORCE_MERGE=1 "$MMSEQS" tsv2db "${IN}.tsv" "${OUT}" --output-dbtype 0 ${VERBOSITY} + fi fi if notExists "${OUT}_seq.dbtype"; then - "$MMSEQS" tsv2db "${IN}_seq.tsv" "${OUT}_seq_tmp" --output-dbtype 0 ${VERBOSITY} - MMSEQS_FORCE_MERGE=1 "$MMSEQS" compress "${OUT}_seq_tmp" "${OUT}_seq" ${VERBOSITY} - "$MMSEQS" rmdb "${OUT}_seq_tmp" ${VERBOSITY} + if [ -n "${COMPRESSED}" ]; then + "$MMSEQS" tsv2db "${IN}_seq.tsv" "${OUT}_seq_tmp" --output-dbtype 0 ${VERBOSITY} + MMSEQS_FORCE_MERGE=1 "$MMSEQS" compress "${OUT}_seq_tmp" "${OUT}_seq" ${VERBOSITY} + "$MMSEQS" rmdb "${OUT}_seq_tmp" ${VERBOSITY} + else + "$MMSEQS" tsv2db "${IN}_seq.tsv" "${OUT}_seq" --output-dbtype 0 ${VERBOSITY} + fi fi if notExists "${OUT}_aln.dbtype"; then - "$MMSEQS" tsv2db "${IN}_aln.tsv" "${OUT}_aln_tmp" --output-dbtype 5 ${VERBOSITY} - MMSEQS_FORCE_MERGE=1 "$MMSEQS" compress "${OUT}_aln_tmp" "${OUT}_aln" ${VERBOSITY} - "$MMSEQS" rmdb "${OUT}_aln_tmp" ${VERBOSITY} + if [ -n "${COMPRESSED}" ]; then + "$MMSEQS" tsv2db "${IN}_aln.tsv" "${OUT}_aln_tmp" --output-dbtype 5 ${VERBOSITY} + MMSEQS_FORCE_MERGE=1 "$MMSEQS" compress "${OUT}_aln_tmp" "${OUT}_aln" ${VERBOSITY} + "$MMSEQS" rmdb "${OUT}_aln_tmp" ${VERBOSITY} + else + MMSEQS_FORCE_MERGE=1 "$MMSEQS" tsv2db "${IN}_aln.tsv" "${OUT}_aln" --output-dbtype 5 ${VERBOSITY} + fi fi if notExists "${OUT}_seq_h.dbtype"; then diff --git a/src/MMseqsBase.cpp b/src/MMseqsBase.cpp index 720aeacc..8325f0d4 100644 --- a/src/MMseqsBase.cpp +++ b/src/MMseqsBase.cpp @@ -1150,7 +1150,7 @@ std::vector baseCommands = { " ", CITATION_MMSEQS2,{{"",DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA, NULL}}}, - {"tsv2exprofiledb", tsv2exprofiledb, &par.onlyverbosity, COMMAND_PROFILE_PROFILE, + {"tsv2exprofiledb", tsv2exprofiledb, &par.verbandcompression, COMMAND_PROFILE_PROFILE, "Create a expandable profile db from TSV files", NULL, "Milot Mirdita ", diff --git a/src/util/tsv2exprofiledb.cpp b/src/util/tsv2exprofiledb.cpp index 96a60291..f1c21739 100644 --- a/src/util/tsv2exprofiledb.cpp +++ b/src/util/tsv2exprofiledb.cpp @@ -6,14 +6,20 @@ #include "tsv2exprofiledb.sh.h" +void setTsv2ExProfileDbDefaults(Parameters *p) { + p->compressed = true; +} + int tsv2exprofiledb(int argc, const char **argv, const Command &command) { Parameters &par = Parameters::getInstance(); + setTsv2ExProfileDbDefaults(&par); par.parseParameters(argc, argv, command, true, 0, 0); std::string program = par.db2 + ".sh"; FileUtil::writeFile(program, tsv2exprofiledb_sh, tsv2exprofiledb_sh_len); CommandCaller cmd; + cmd.addVariable("COMPRESSED", par.compressed ? "TRUE" : NULL); cmd.addVariable("VERBOSITY", par.createParameterString(par.onlyverbosity).c_str()); cmd.execProgram(FileUtil::getRealPathFromSymLink(program).c_str(), par.filenames);