Skip to content

Commit

Permalink
Move more project configs to config.py and download HMM to .phyling/H…
Browse files Browse the repository at this point in the history
…MM instead of user specified path.
  • Loading branch information
chtsai0105 committed Nov 30, 2023
1 parent 759b2fe commit 4c0afa4
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 33 deletions.
16 changes: 10 additions & 6 deletions src/phyling/config.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
"""Some project specific configuration parameters."""
prot_aln_ext = 'aa.mfa' # protein alignment extension
cds_aln_ext = 'cds.mfa' # cds alignment extension
protein_ext = 'faa' # protein fasta files
cds_ext = 'cds' # coding sequence alignment file extension
aln_ext = 'mfa' # general alignment extension
from pathlib import Path

default_HMM = 'HMM' # default directory for HMM downloads
database = "https://busco-data.ezlab.org/v5/data"
cfg_dir = Path.home() / ".phyling"
default_HMM = "HMM" # default directory for HMM downloads

prot_aln_ext = "aa.mfa" # protein alignment extension
cds_aln_ext = "cds.mfa" # cds alignment extension
protein_ext = "faa" # protein fasta files
cds_ext = "cds" # coding sequence alignment file extension
aln_ext = "mfa" # general alignment extension
20 changes: 11 additions & 9 deletions src/phyling/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
from urllib.error import HTTPError
from urllib.request import urlopen

import phyling.config


class Data_updater(ABC):
"""Store, update and retrieve BUSCO markers."""
Expand Down Expand Up @@ -92,11 +94,11 @@ class Metadata_updater(Data_updater):
dataset names for downloading BUSCO markers.
"""

def __init__(self, database_url, cfg_dir):
def __init__(self, database_url):
"""Initialize metadata object and local db file."""
super().__init__(database_url)
self._filetype = "metadata"
self._data = cfg_dir / "metadata.pickle"
self._data = phyling.config.cfg_dir / "metadata.pickle"
self._data_url = f"{database_url}/file_versions.tsv"
self._metadata_md5_url = f"{database_url}/file_versions.tsv.hash"

Expand Down Expand Up @@ -189,17 +191,17 @@ def _save_data(self, data):
# return hasher.hexdigest()


def download(database, cfg_dir, markerset, output, **kwargs) -> None:
def download(markerset, **kwargs) -> None:
"""
Help to download/update BUSCO v5 markerset to a local folder.
First it check whether the ~/.phyling/metadata.pickle is exist. A missing or outdated file will trigger the module
to update the metadata.
First it checks whether the metadata file is exist under the config folder ~/.phyling. A missing or outdated file
will trigger the module to download/update the metadata.
Passing "list" to markerset argument will list all the available markersets. Passing a valid name to the markerset
argument will download the markerset to the given output path.
"""
metadata_updater = Metadata_updater(database_url=database, cfg_dir=cfg_dir)
metadata_updater = Metadata_updater(database_url=phyling.config.database)
markerset_dict = metadata_updater.updater()

if markerset == "list":
Expand All @@ -211,16 +213,16 @@ def download(database, cfg_dir, markerset, output, **kwargs) -> None:
# Adjust databases display according to the terminal size
width, _ = shutil.get_terminal_size((80, 24))
col = width // 40
url_list = [url_list[x: x + col] for x in range(0, len(url_list), col)]
url_list = [url_list[x : x + col] for x in range(0, len(url_list), col)]
col_width = max(len(word) for row in url_list for word in row) + 3 # padding
for row in url_list:
# Print the database list
print(" ".join(word.ljust(col_width) for word in row))

else:
hmm_markerset_updater = HMM_markerset_updater(
database_url=database,
output_dir=output,
database_url=phyling.config.database,
output_dir=Path(phyling.config.cfg_dir, phyling.config.default_HMM),
metadata=markerset_dict,
name=markerset,
)
Expand Down
3 changes: 2 additions & 1 deletion src/phyling/libphyling.py
Original file line number Diff line number Diff line change
Expand Up @@ -600,9 +600,10 @@ def main(inputs, input_dir, output, markerset, evalue, method, non_trim, from_ch
sys.exit(1)

if not markerset.exists():
markerset = Path(phyling.config.default_HMM, markerset, "hmms")
markerset = Path(phyling.config.cfg_dir, phyling.config.default_HMM, markerset, "hmms")
else:
markerset = Path(markerset)
logging.info(f"Loading markerset from {markerset}")

if not markerset.exists():
logging.error(f"Markerset folder does not exist {markerset} - did you download BUSCO?")
Expand Down
19 changes: 2 additions & 17 deletions src/phyling/phyling.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import sys
import textwrap
from pathlib import Path
from types import SimpleNamespace

try:
from importlib.metadata import version
Expand All @@ -34,13 +33,6 @@ def parser_submodule(parser, parent_parser) -> None:
description=textwrap.dedent(download.__doc__),
)
p_download.add_argument("markerset", metavar='HMM markerset or "list"', help="Name of the HMM markerset")
p_download.add_argument(
"-o",
"--output",
type=Path,
default=f"./{phyling.config.default_HMM}",
help=f'Output directory to save HMM markerset (default="./{phyling.config.default_HMM}")',
)
p_download.set_defaults(func=download)

p_aln = subparsers.add_parser(
Expand Down Expand Up @@ -166,16 +158,9 @@ def main():
"""
logging.basicConfig(format=f"%(asctime)s {main.__name__} %(levelname)s %(message)s", level="INFO", force=True)
logger = logging.getLogger()
# Create namespace object conf
args = dict()

args["script_path"] = Path(__file__).resolve().parent
args["database"] = "https://busco-data.ezlab.org/v5/data"
args["cfg_dir"] = Path.home() / ".phyling"
args = SimpleNamespace(**args)

# Create config folder in $HOME/.phyling
args.cfg_dir.mkdir(exist_ok=True)
phyling.config.cfg_dir.mkdir(exist_ok=True)

# Implement shared arguments between sub-menu, reference from
# https://stackoverflow.com/questions/33645859/how-to-add-common-arguments-to-argparse-subcommands
Expand All @@ -200,7 +185,7 @@ def main():
parser.print_help(sys.stderr)
sys.exit(1)

parser.parse_args(namespace=args)
args = parser.parse_args()

if args.verbose:
logger.setLevel("DEBUG")
Expand Down

0 comments on commit 4c0afa4

Please sign in to comment.