Skip to content

Commit

Permalink
Merge pull request #61 from hsgweon/pipits_UNITE_10.01.2024
Browse files Browse the repository at this point in the history
Updated UNITE to 10.0. SINTAX is the default classifier.
  • Loading branch information
hsgweon authored Aug 18, 2024
2 parents 7b1d370 + 2435024 commit 560015d
Show file tree
Hide file tree
Showing 8 changed files with 82 additions and 57 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@

## Updates/Notice/News

###### UPDATE (18 August 2024) - PIPITS 3.1
> Significant changes.
>
> - **UNITE 10.0 added is the default DB version until further update.
> - The default classifier now is SINTAX. It's much much quicker and provides very comparable results against RDP Classifier. This change is due to the fact that UNITE database has increased massively.
###### UPDATE (11 August 2023) - PIPITS 3.0
> - Just a slight change in the installation instruction, namely from python=3.6 to python=3.8 to avoid "SyntaxError: invalid syntax"
Expand Down
2 changes: 1 addition & 1 deletion bin/pipits_funits
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ from pispino.seqtools import *
from pispino.runcmd import *
from pispino.logger import *

__version__ = 3.0
__version__ = 3.1

__author__ = "Hyun Soon Gweon"
__copyright__ = "Copyright 2015, The PIPITS Project"
Expand Down
129 changes: 74 additions & 55 deletions bin/pipits_process
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import sys, os, argparse, subprocess, shutil
from pispino.runcmd import *
from pispino.logger import *

__version__ = 3.0
__version__ = 3.1

__author__ = "Hyun Soon Gweon"
__copyright__ = "Copyright 2015, The PIPITS Project"
Expand Down Expand Up @@ -119,15 +119,15 @@ if __name__ == '__main__':
dest = "taxassignmentmethod",
help = "Choice of taxonomic assignment. By default, PIPITS will run both RDP Classifier and SINTAX (VSEARCH).",
choices = ["all", "rdp", "sin"],
default = "all",
default = "sin",
required = False)
parser.add_argument(
"--unite",
action = "store",
dest = "unite",
help = "UNITE db version to be used - PIPITS will download db automaticlly. Leaving this option out will default to the most recent version of UNITE available to PIPITS.",
choices = ["27.10.2022", "10.05.2021", "04.02.2020", "02.02.2019", "01.12.2017", "28.06.2017"],
default = "27.10.2022",
choices = ["04.04.2024", "25.07.2023", "27.10.2022", "10.05.2021", "04.02.2020", "02.02.2019", "01.12.2017", "28.06.2017"],
default = "04.04.2024",
required = False)
options = parser.parse_args()

Expand Down Expand Up @@ -331,69 +331,87 @@ if __name__ == '__main__':
# UNITE #
#########

logger("Downloading UNITE trained database, version: " + options.unite, logging_file, display = True)

url = "https://sourceforge.net/projects/pipits/files/PIPITS_DB/UNITE_retrained_" + options.unite + ".tar.gz"
if options.taxassignmentmethod == "all" or options.taxassignmentmethod == "rdp":

if options.unite == "02.02.2019":
md5 = "8fd3b74a510bb20b67933a2ecc620f89"
elif options.unite == "01.12.2017":
md5 = "3c5be9c60fecf70076739379e7c9ead5"
elif options.unite == "28.06.2017":
md5 = "33fa78987751a494c586676ff3a0da65"
elif options.unite == "04.02.2020":
md5 = "b2f833c89794be20a5fdb6169d9205f1"
elif options.unite == "10.05.2021":
md5 = "13f1edfb1357eeda3f41ff8b0a15447f"
elif options.unite == "27.10.2022":
md5 = "7d31f5612a78607e50d4170b75d0cbfa"

downloadDB(
url = url,
md5 = md5,
output_dir = "pipits_db",
logging_file = logging_file,
summary_file = summary_file,
verbose = options.verbose)
logger(BLUE + "... done" + ENDC, logging_file, display = True)
logger("Downloading UNITE trained database, version: " + options.unite, logging_file, display = True)

url = "https://sourceforge.net/projects/pipits/files/PIPITS_DB/UNITE_retrained_" + options.unite + ".tar.gz"

if options.unite == "02.02.2019":
md5 = "8fd3b74a510bb20b67933a2ecc620f89"
elif options.unite == "01.12.2017":
md5 = "3c5be9c60fecf70076739379e7c9ead5"
elif options.unite == "28.06.2017":
md5 = "33fa78987751a494c586676ff3a0da65"
elif options.unite == "04.02.2020":
md5 = "b2f833c89794be20a5fdb6169d9205f1"
elif options.unite == "10.05.2021":
md5 = "13f1edfb1357eeda3f41ff8b0a15447f"
elif options.unite == "27.10.2022":
md5 = "7d31f5612a78607e50d4170b75d0cbfa"
elif options.unite == "25.07.2023":
md5 = "1c578d0aba436f0b66d9a73b2991086d"
elif options.unite == "04.04.2024":
md5 = "94812f45cbfed846b55a6e845e68f35f"

downloadDB(
url = url,
md5 = md5,
output_dir = "pipits_db",
logging_file = logging_file,
summary_file = summary_file,
verbose = options.verbose)
logger(BLUE + "... done" + ENDC, logging_file, display = True)


##################
# VSEARCH SINTAX #
##################

logger("Downloading database for SINTAX", logging_file, display = True)
if options.taxassignmentmethod == "all" or options.taxassignmentmethod == "sin":

url = "https://sourceforge.net/projects/pipits/files/PIPITS_DB/UNITE_retrained_27.10.2022.sintax.fa.tar.gz"
md5 = "b26ebd07a5abb415e1ad35b8dc8108d2"

downloadDB(
url = url,
md5 = md5,
output_dir = "pipits_db",
logging_file = logging_file,
summary_file = summary_file,
verbose = options.verbose)
logger(BLUE + "... done" + ENDC, logging_file, display = True)
logger("Downloading database for SINTAX, version: " + options.unite, logging_file, display = True)

url = "https://sourceforge.net/projects/pipits/files/PIPITS_DB/UNITE_retrained_" + options.unite + ".sintax.fa.tar.gz"
md5 = "135cce3029569c1c0528a7fdd9ed6673"

if options.unite == "27.10.2022":
md5 = "b26ebd07a5abb415e1ad35b8dc8108d2"
elif options.unite == "25.07.2023":
md5 = "cf7dcd5e289a4d31c87fadb056caaca7"
elif options.unite == "04.04.2024":
md5 = "135cce3029569c1c0528a7fdd9ed6673"

downloadDB(
url = url,
md5 = md5,
output_dir = "pipits_db",
logging_file = logging_file,
summary_file = summary_file,
verbose = options.verbose)
logger(BLUE + "... done" + ENDC, logging_file, display = True)


##########
# WARCUP #
##########

logger("Downloading WARCUP trained database: ", logging_file, display = True)
if options.warcup == True:

logger("Downloading WARCUP trained database: ", logging_file, display = True)

url = "https://sourceforge.net/projects/pipits/files/PIPITS_DB/warcup_retrained_V2.tar.gz"
md5 = "e84733f23121f00de03b2a9b5398d6fb"
url = "https://sourceforge.net/projects/pipits/files/PIPITS_DB/warcup_retrained_V2.tar.gz"
md5 = "e84733f23121f00de03b2a9b5398d6fb"

downloadDB(
url = url,
md5 = md5,
output_dir = "pipits_db",
logging_file = logging_file,
summary_file = summary_file,
verbose = options.verbose)
logger(BLUE + "... done" + ENDC, logging_file, display = True)
downloadDB(
url = url,
md5 = md5,
output_dir = "pipits_db",
logging_file = logging_file,
summary_file = summary_file,
verbose = options.verbose)
logger(BLUE + "... done" + ENDC, logging_file, display = True)


##########
Expand Down Expand Up @@ -540,7 +558,7 @@ if __name__ == '__main__':
# VSINTAX against UNITE #
#########################

if (options.taxassignmentmethod == "all" or "sin"):
if options.taxassignmentmethod == "all" or options.taxassignmentmethod == "sin":

logger(ENDC + "Assigning taxonomy with VSEARCH-SINTAX [VSEARCH]" + ENDC, logging_file, display = True)
PIPITS_UNITE_RETRAINED_VSEARCHSINTAXFORMATTED = "pipits_db/UNITE_retrained_" + options.unite + ".sintax.fa/UNITE_retrained_" + options.unite + ".sintax.fa"
Expand Down Expand Up @@ -619,8 +637,8 @@ if __name__ == '__main__':
# RDP against UNITE #
#####################

if (options.taxassignmentmethod == "all" or "rdp"):
if options.taxassignmentmethod == "all" or options.taxassignmentmethod == "rdp":

logger(ENDC + "Assigning taxonomy with UNITE [RDP Classifier]" + ENDC, logging_file, display = True)
PIPITS_UNITE_RETRAINED_PROPERTIES = "pipits_db/UNITE_retrained_" + options.unite + "/UNITE_retrained/rRNAClassifier.properties"
cmd = " ".join(["classifier",
Expand Down Expand Up @@ -795,10 +813,11 @@ if __name__ == '__main__':

return totalCount, otus, sampleSize

if (options.taxassignmentmethod == "all" or "rdp"):
if options.taxassignmentmethod == "all" or options.taxassignmentmethod == "rdp":
otu_reads_count, otu_count, otu_sample_count = biomstats(options.outDir + "/otu_table_rdp.biom")
phylo_reads_count, phylo_count, phylo_sample_count = biomstats(options.outDir + "/phylotype_table_rdp.biom")
else:

if options.taxassignmentmethod == "all" or options.taxassignmentmethod == "sin":
otu_reads_count, otu_count, otu_sample_count = biomstats(options.outDir + "/otu_table_sintax.biom")
phylo_reads_count, phylo_count, phylo_sample_count = biomstats(options.outDir + "/phylotype_table_sintax.biom")

Expand Down
Binary file modified pipits/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file added pipits/__pycache__/__init__.cpython-39.pyc
Binary file not shown.
Binary file modified pipits/__pycache__/pipits_SeqIO.cpython-36.pyc
Binary file not shown.
Binary file added pipits/__pycache__/pipits_SeqIO.cpython-39.pyc
Binary file not shown.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from setuptools import setup
import os

__version__ = os.environ.get("VERSION", "3.0")
__version__ = os.environ.get("VERSION", "3.1")

setup(
name = "pipits",
Expand Down

0 comments on commit 560015d

Please sign in to comment.