diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 4e1ef42..b120129 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -1,6 +1,3 @@ -# This workflows will upload a Python Package using Twine when a release is created -# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries - name: Upload Python Package on: @@ -9,9 +6,10 @@ on: jobs: deploy: - + name: upload release to PyPI runs-on: ubuntu-latest - + permissions: + id-token: write steps: - uses: actions/checkout@v2 - name: Set up Python @@ -23,9 +21,7 @@ jobs: python -m pip install --upgrade pip pip install setuptools wheel twine - name: Build and publish - env: - TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} - TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} run: | python setup.py sdist bdist_wheel - twine upload dist/* + - name: Publish package distributions to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 \ No newline at end of file diff --git a/.github/workflows/run-codecov.yml b/.github/workflows/run-codecov.yml deleted file mode 100644 index 364eb68..0000000 --- a/.github/workflows/run-codecov.yml +++ /dev/null @@ -1,38 +0,0 @@ -name: Run codecov - -on: - push: - branches: [dev] - pull_request: - branches: [master] - -jobs: - pytest: - runs-on: ${{ matrix.os }} - strategy: - matrix: - python-version: [3.11] - os: [ubuntu-latest] - - steps: - - uses: actions/checkout@v2 - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - - name: Install test dependencies - run: if [ -f requirements/requirements-test.txt ]; then pip install -r requirements/requirements-test.txt; fi - - - name: Install package - run: python -m pip install . - - - name: Run pytest tests - run: pytest tests --cov=./ --cov-report=xml - - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 - with: - file: ./coverage.xml - name: py-${{ matrix.python-version }}-${{ matrix.os }} \ No newline at end of file diff --git a/.github/workflows/run-pytest.yml b/.github/workflows/run-pytest.yml index 6184da9..489f0f6 100644 --- a/.github/workflows/run-pytest.yml +++ b/.github/workflows/run-pytest.yml @@ -11,7 +11,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - python-version: ["3.8", "3.11"] + python-version: ["3.8", "3.12"] os: [ubuntu-latest] steps: diff --git a/docs/README.md b/docs/README.md index 1976829..3d00313 100644 --- a/docs/README.md +++ b/docs/README.md @@ -54,6 +54,10 @@ geofetch -i GSE95654 --just-metadata geofetch -i GSE95654 --processed --just-metadata ``` + +⁣**Note:** We ensure that GEOfetch is compatible with Unix, Linux, and Mac OS X. +However, due to dependencies, some features of GEOfetch may not be available on Windows. + ### Check out what exactly argument you want to use to download data: ![](./img/arguments_outputs.svg) diff --git a/docs/changelog.md b/docs/changelog.md index 2846978..e7af384 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,5 +1,8 @@ # Changelog +## [0.12.6] -- 2024-02-05 +- Updated support for Windows in Prefetch (Note: Some functionality may still be unavailable on Windows) + ## [0.12.5] -- 2023-11-29 - Fixed bug, where description was not populated in PEP diff --git a/docs_jupyter/python-usage.ipynb b/docs_jupyter/python-usage.ipynb index 9b34736..39e6111 100644 --- a/docs_jupyter/python-usage.ipynb +++ b/docs_jupyter/python-usage.ipynb @@ -138,7 +138,13 @@ } ], "source": [ - "geof = Geofetcher(processed=True, data_source=\"all\", const_limit_project = 20, const_limit_discard = 500, attr_limit_truncate = 10000 )" + "geof = Geofetcher(\n", + " processed=True,\n", + " data_source=\"all\",\n", + " const_limit_project=20,\n", + " const_limit_discard=500,\n", + " attr_limit_truncate=10000,\n", + ")" ] }, { @@ -418,7 +424,7 @@ } ], "source": [ - "len(projects['GSE95654_samples'].samples)" + "len(projects[\"GSE95654_samples\"].samples)" ] }, { @@ -684,7 +690,7 @@ } ], "source": [ - "projects['GSE95654_samples'].sample_table.iloc[:15 , :5]" + "projects[\"GSE95654_samples\"].sample_table.iloc[:15, :5]" ] } ], diff --git a/geofetch/__init__.py b/geofetch/__init__.py index 8e208d1..fcd9139 100644 --- a/geofetch/__init__.py +++ b/geofetch/__init__.py @@ -1,4 +1,5 @@ """ Package-level data """ + import logmuse import coloredlogs diff --git a/geofetch/_version.py b/geofetch/_version.py index 8e377d6..8e2394f 100644 --- a/geofetch/_version.py +++ b/geofetch/_version.py @@ -1 +1 @@ -__version__ = "0.12.5" +__version__ = "0.12.6" diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index 932fd74..bdbd6a1 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -11,7 +11,7 @@ from rich.progress import track import re import logmuse -from ubiquerg import expandpath, is_command_callable +from ubiquerg import expandpath from typing import List, Union, Dict, Tuple, NoReturn import peppy import pandas as pd @@ -59,6 +59,7 @@ _filter_gsm, _unify_list_keys, gse_content_to_dict, + is_prefetch_callable, ) _LOGGER = logging.getLogger(__name__) @@ -371,10 +372,10 @@ def fetch_all(self, input: str, name: str = None) -> Union[NoReturn, peppy.Proje # check to make sure prefetch is callable if not self.just_metadata and not self.processed: - if not is_command_callable("prefetch"): + if not is_prefetch_callable(): raise SystemExit( - "To download raw data You must first install the sratoolkit, with prefetch in your PATH." - " Installation instruction: http://geofetch.databio.org/en/latest/install/" + "To download raw data, you must first install the sratoolkit, with prefetch in your PATH. " + "Installation instruction: http://geofetch.databio.org/en/latest/install/" ) acc_GSE_list = parse_accessions( @@ -546,9 +547,9 @@ def fetch_all(self, input: str, name: str = None) -> Union[NoReturn, peppy.Proje name=self.project_name, meta_processed_samples=processed_metadata_samples, meta_processed_series=processed_metadata_series, - gse_meta_dict=file_gse_content_dict - if len(acc_GSE_list.keys()) == 1 - else None, + gse_meta_dict=( + file_gse_content_dict if len(acc_GSE_list.keys()) == 1 else None + ), ) if self.just_object: return return_value @@ -559,9 +560,9 @@ def fetch_all(self, input: str, name: str = None) -> Union[NoReturn, peppy.Proje f"{self.project_name}_PEP", metadata_dict_combined, subannotation_dict_combined, - gse_meta_dict=file_gse_content_dict - if len(acc_GSE_list.keys()) == 1 - else None, + gse_meta_dict=( + file_gse_content_dict if len(acc_GSE_list.keys()) == 1 else None + ), ) if self.just_object: return return_value @@ -1036,7 +1037,7 @@ def _write_processed_annotation( ) if not just_object: - with open(file_annotation_path, "w") as m_file: + with open(file_annotation_path, "w", encoding="utf-8") as m_file: dict_writer = csv.DictWriter(m_file, processed_metadata[0].keys()) dict_writer.writeheader() dict_writer.writerows(processed_metadata) @@ -1789,15 +1790,22 @@ def _download_processed_file(self, file_url: str, data_folder: str) -> bool: return True except IOError as e: - _LOGGER.error(str(e)) - # The server times out if we are hitting it too frequently, - # so we should sleep a bit to reduce frequency - sleeptime = (ntry + 1) ** 3 - _LOGGER.info(f"Sleeping for {sleeptime} seconds") - time.sleep(sleeptime) - ntry += 1 - if ntry > 4: - raise e + if os.name == "nt": + _LOGGER.error(f"{e}") + raise OSError( + "Windows may not have wget command. " + "Check if `wget` command is installed correctly." + ) + else: + _LOGGER.error(str(e)) + # The server times out if we are hitting it too frequently, + # so we should sleep a bit to reduce frequency + sleeptime = (ntry + 1) ** 3 + _LOGGER.info(f"Sleeping for {sleeptime} seconds") + time.sleep(sleeptime) + ntry += 1 + if ntry > 4: + raise e def _get_SRA_meta(self, file_gse_content: list, gsm_metadata, file_sra=None): """ @@ -1865,12 +1873,13 @@ def _get_SRA_meta(self, file_gse_content: list, gsm_metadata, file_sra=None): else: # open existing annotation _LOGGER.info("Found SRA metadata, opening..") - with open(file_sra, "r") as m_file: + with open(file_sra, "r", encoding="UTF-8") as m_file: reader = csv.reader(m_file) file_list = [] srp_list = [] for k in reader: - file_list.append(k) + if k: + file_list.append(k) for value_list in file_list[1:]: srp_list.append(dict(zip(file_list[0], value_list))) diff --git a/geofetch/utils.py b/geofetch/utils.py index 850a77e..dcab44f 100644 --- a/geofetch/utils.py +++ b/geofetch/utils.py @@ -275,7 +275,7 @@ def fetch_metadata( os.makedirs(dirpath) # save file: - with open(outpath, "w") as f: + with open(outpath, "w", encoding="utf-8") as f: f.write(result_text) return result_list @@ -757,3 +757,22 @@ def gse_content_to_dict(gse_content: List[str]) -> Dict[str, dict]: gse_dict[new_key] = new_value return {"experiment_metadata": gse_dict} + + +def is_prefetch_callable() -> bool: + """ + Test if the prefetch command can be run. + + :return: True if it is available. + """ + try: + # Option -V means display version and then quit. + subprocess.run( + ["prefetch", "-V"], + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + return True + except (subprocess.SubprocessError, OSError): + return False diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 854b024..8cc987b 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -1,10 +1,9 @@ -attmap>=0.1.8 colorama>=0.3.9 logmuse>=0.2.6 ubiquerg>=0.6.2 requests>=2.28.1 xmltodict>=0.13.0 pandas>=1.5.3 -peppy>=0.35.3 +peppy>=0.40.0 rich>=12.5.1 coloredlogs>=15.0.1