diff --git a/.conda/meta.yaml b/.conda/meta.yaml deleted file mode 100644 index 6702ced..0000000 --- a/.conda/meta.yaml +++ /dev/null @@ -1,50 +0,0 @@ -{% set data = load_setup_py_data() %} - -package: - name: pytask-parallel - version: {{ data.get('version') }} - -source: - # git_url is nice in that it won't capture devenv stuff. However, it only captures - # committed code, so pay attention. - git_url: ../ - -build: - noarch: python - number: 0 - script: {{ PYTHON }} setup.py install --single-version-externally-managed --record record.txt - -requirements: - host: - - python - - pip - - setuptools - - run: - - python >=3.6 - - cloudpickle - - pytask >=0.0.11 - - loky - -test: - requires: - - pytest - - loky - source_files: - - tox.ini - - tests - commands: - - pytask --version - - pytask --help - - pytask clean - - pytask markers - - pytask collect - - - pytest tests - -about: - home: https://github.com/pytask-dev/pytask-parallel - license: MIT - license_file: LICENSE - summary: Parallelize the execution of tasks. - dev_url: https://github.com/pytask-dev/pytask-parallel diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 03c4ed6..8662f0b 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -1,11 +1,9 @@ ---- +______________________________________________________________________ -name: Bug Report -about: Create a bug report to help us improve pytask-parallel -title: "BUG:" -labels: "bug" +name: Bug Report about: Create a bug report to help us improve pytask-parallel title: +"BUG:" labels: "bug" ---- +______________________________________________________________________ - [ ] I have checked that this issue has not already been reported. @@ -14,11 +12,11 @@ labels: "bug" - [ ] (optional) I have confirmed this bug exists on the `main` branch of pytask-parallel. ---- +______________________________________________________________________ -**Note**: Please read [this -guide](https://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports) detailing -how to provide the necessary information for us to reproduce your bug. +**Note**: Please read +[this guide](https://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports) +detailing how to provide the necessary information for us to reproduce your bug. #### Code Sample, a copy-pastable example diff --git a/.github/ISSUE_TEMPLATE/documentation.md b/.github/ISSUE_TEMPLATE/documentation.md index 6217526..786691e 100644 --- a/.github/ISSUE_TEMPLATE/documentation.md +++ b/.github/ISSUE_TEMPLATE/documentation.md @@ -1,11 +1,9 @@ ---- +______________________________________________________________________ -name: Documentation Improvement -about: Report wrong or missing documentation -title: "DOC:" -labels: "documentation" +name: Documentation Improvement about: Report wrong or missing documentation title: +"DOC:" labels: "documentation" ---- +______________________________________________________________________ #### Location of the documentation diff --git a/.github/ISSUE_TEMPLATE/enhancement.md b/.github/ISSUE_TEMPLATE/enhancement.md index 278ece3..6d8e00e 100644 --- a/.github/ISSUE_TEMPLATE/enhancement.md +++ b/.github/ISSUE_TEMPLATE/enhancement.md @@ -1,16 +1,14 @@ ---- +______________________________________________________________________ -name: Enhancement -about: Suggest an idea for pytask-parallel -title: "ENH:" -labels: "enhancement" +name: Enhancement about: Suggest an idea for pytask-parallel title: "ENH:" labels: +"enhancement" ---- +______________________________________________________________________ #### Is your feature request related to a problem? Provide a description of what the problem is, e.g. "I wish I could use pytask-parallel -to do [...]". +to do \[...\]". #### Describe the solution you'd like diff --git a/.github/ISSUE_TEMPLATE/question.md b/.github/ISSUE_TEMPLATE/question.md index b5d2878..d93e1c3 100644 --- a/.github/ISSUE_TEMPLATE/question.md +++ b/.github/ISSUE_TEMPLATE/question.md @@ -1,17 +1,15 @@ ---- +______________________________________________________________________ -name: Submit Question -about: Ask a general question about pytask-parallel -title: "QST:" +name: Submit Question about: Ask a general question about pytask-parallel title: "QST:" labels: "question" ---- +______________________________________________________________________ #### Question about pytask-parallel -**Note**: If you'd still like to submit a question, please read [this guide]( -https://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports) detailing how to -provide the necessary information for us to reproduce your question. +**Note**: If you'd still like to submit a question, please read +[this guide](https://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports) +detailing how to provide the necessary information for us to reproduce your question. ```python # Your code here, if applicable diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 1817a2f..b627f37 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -6,4 +6,4 @@ Provide a description and/or bullet points to describe the changes in this PR. - [ ] Reference issues which can be closed due to this PR with "Closes #x". - [ ] Review whether the documentation needs to be updated. -- [ ] Document PR in docs/changes.rst. +- [ ] Document PR in CHANGES.md. diff --git a/.github/workflows/continuous-integration-workflow.yml b/.github/workflows/main.yml similarity index 85% rename from .github/workflows/continuous-integration-workflow.yml rename to .github/workflows/main.yml index 2c9b8b8..3d6fb0a 100644 --- a/.github/workflows/continuous-integration-workflow.yml +++ b/.github/workflows/main.yml @@ -1,4 +1,4 @@ -name: Continuous Integration Workflow +name: main # Automatically cancel a previous run. concurrency: @@ -41,7 +41,7 @@ jobs: - name: Run unit tests and doctests. shell: bash -l {0} - run: tox -e pytest -- -m "unit or (not integration and not end_to_end)" --cov=./ --cov-report=xml -n auto + run: tox -e pytest -- tests -m "unit or (not integration and not end_to_end)" --cov=./ --cov-report=xml -n auto - name: Upload coverage report for unit tests and doctests. if: runner.os == 'Linux' && matrix.python-version == '3.9' @@ -50,7 +50,7 @@ jobs: - name: Run integration tests. shell: bash -l {0} - run: tox -e pytest -- -m integration --cov=./ --cov-report=xml -n auto + run: tox -e pytest -- tests -m integration --cov=./ --cov-report=xml -n auto - name: Upload coverage reports of integration tests. if: runner.os == 'Linux' && matrix.python-version == '3.9' @@ -59,7 +59,7 @@ jobs: - name: Run end-to-end tests. shell: bash -l {0} - run: tox -e pytest -- -m end_to_end --cov=./ --cov-report=xml -n auto + run: tox -e pytest -- tests -m end_to_end --cov=./ --cov-report=xml -n auto - name: Upload coverage reports of end-to-end tests. if: runner.os == 'Linux' && matrix.python-version == '3.9' diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 685bb85..4c09c56 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,7 +6,6 @@ repos: args: ['--maxkb=100'] - id: check-merge-conflict - id: check-yaml - exclude: meta.yaml - id: debug-statements - id: end-of-file-fixer - repo: https://github.com/pre-commit/pygrep-hooks @@ -17,9 +16,6 @@ repos: - id: python-no-eval - id: python-no-log-warn - id: python-use-type-annotations - - id: rst-backticks - - id: rst-directive-colons - - id: rst-inline-touching-normal - id: text-unicode-replacement-char - repo: https://github.com/asottile/pyupgrade rev: v2.32.0 @@ -39,11 +35,6 @@ repos: rev: 22.3.0 hooks: - id: black -- repo: https://github.com/asottile/blacken-docs - rev: v1.12.1 - hooks: - - id: blacken-docs - additional_dependencies: [black] - repo: https://github.com/PyCQA/flake8 rev: 4.0.1 hooks: @@ -65,24 +56,30 @@ repos: pydocstyle, Pygments, ] -- repo: https://github.com/PyCQA/doc8 - rev: 0.11.1 - hooks: - - id: doc8 - repo: https://github.com/econchick/interrogate rev: 1.5.0 hooks: - id: interrogate args: [-v, --fail-under=40, src, tests] +- repo: https://github.com/executablebooks/mdformat + rev: 0.7.14 + hooks: + - id: mdformat + additional_dependencies: [ + mdformat-gfm, + mdformat-black, + ] + args: [--wrap, "88"] - repo: https://github.com/codespell-project/codespell rev: v2.1.0 hooks: - id: codespell - args: [-L unparseable] - repo: https://github.com/mgedmin/check-manifest rev: "0.48" hooks: - id: check-manifest + args: [--no-build-isolation] + additional_dependencies: [setuptools-scm, toml] - repo: meta hooks: - id: check-hooks-apply diff --git a/CHANGES.md b/CHANGES.md new file mode 100644 index 0000000..1b4f293 --- /dev/null +++ b/CHANGES.md @@ -0,0 +1,62 @@ +# Changes + +This is a record of all past pytask-parallel releases and what went into them in reverse +chronological order. Releases follow [semantic versioning](https://semver.org/) and all +releases are available on [PyPI](https://pypi.org/project/pytask-parallel) and +[Anaconda.org](https://anaconda.org/conda-forge/pytask-parallel). + +## 0.1.2 - 2022-xx-xx + +- {pull}`36` adds a test for . + +## 0.1.1 - 2022-02-08 + +- {pull}`30` removes unnecessary content from `tox.ini`. +- {pull}`33` skips concurrent CI builds. +- {pull}`34` deprecates Python 3.6 and adds support for Python 3.10. + +## 0.1.0 - 2021-07-20 + +- {pull}`19` adds `conda-forge` to the `README.rst`. +- {pull}`22` add note that the debugger cannot be used together with pytask-parallel. +- {pull}`24` replaces versioneer with setuptools-scm. +- {pull}`25` aborts build and prints reports on `KeyboardInterrupt`. +- {pull}`27` enables rich tracebacks from subprocesses. + +## 0.0.8 - 2021-03-05 + +- {pull}`17` fixes the unidentifiable version. + +## 0.0.7 - 2021-03-04 + +- {pull}`14` fixes some post-release issues. +- {pull}`16` add dependencies to `setup.py` and changes the default backend to `loky`. + +## 0.0.6 - 2021-02-27 + +- {pull}`12` replaces all occurrences of `n_processes` with `n_workers`. +- {pull}`13` adds a license, versioneer, and allows publishing on PyPI. + +## 0.0.5 - 2020-12-28 + +- {pull}`5` fixes the CI and other smaller issues. +- {pull}`8` aligns pytask-parallel with task priorities in pytask v0.0.11. +- {pull}`9` enables --max-failures. Closes {issue}`7`. +- {pull}`10` releases v0.0.5. + +## 0.0.4 - 2020-10-30 + +- {pull}`4` implement an executor with `loky`. + +## 0.0.3 - 2020-09-12 + +- {pull}`3` align the program with pytask v0.0.6. + +## 0.0.2 - 2020-08-12 + +- {pull}`1` prepares the plugin for pytask v0.0.5. +- {pull}`2` better parsing and callbacks. + +## 0.0.1 - 2020-07-17 + +- Initial commit which combined the whole effort to release v0.0.1. diff --git a/CHANGES.rst b/CHANGES.rst deleted file mode 100644 index 3565e3e..0000000 --- a/CHANGES.rst +++ /dev/null @@ -1,84 +0,0 @@ -Changes -======= - -This is a record of all past pytask-parallel releases and what went into them in reverse -chronological order. Releases follow `semantic versioning `_ and -all releases are available on `PyPI `_ and -`Anaconda.org `_. - -0.1.2 - 2022-xx-xx ------------------- - -- :gh:`36` adds a test for https://github.com/pytask-dev/pytask/issues/216. - - -0.1.1 - 2022-02-08 ------------------- - -- :gh:`30` removes unnecessary content from ``tox.ini``. -- :gh:`33` skips concurrent CI builds. -- :gh:`34` deprecates Python 3.6 and adds support for Python 3.10. - - -0.1.0 - 2021-07-20 ------------------- - -- :gh:`19` adds ``conda-forge`` to the ``README.rst``. -- :gh:`22` add note that the debugger cannot be used together with pytask-parallel. -- :gh:`24` replaces versioneer with setuptools-scm. -- :gh:`25` aborts build and prints reports on ``KeyboardInterrupt``. -- :gh:`27` enables rich tracebacks from subprocesses. - - -0.0.8 - 2021-03-05 ------------------- - -- :gh:`17` fixes the unidentifiable version. - - -0.0.7 - 2021-03-04 ------------------- - -- :gh:`14` fixes some post-release issues. -- :gh:`16` add dependencies to ``setup.py`` and changes the default backend to ``loky``. - - -0.0.6 - 2021-02-27 ------------------- - -- :gh:`12` replaces all occurrences of ``n_processes`` with ``n_workers``. -- :gh:`13` adds a license, versioneer, and allows publishing on PyPI. - - -0.0.5 - 2020-12-28 ------------------- - -- :gh:`5` fixes the CI and other smaller issues. -- :gh:`8` aligns pytask-parallel with task priorities in pytask v0.0.11. -- :gh:`9` enables --max-failures. Closes :gh:`7`. -- :gh:`10` releases v0.0.5. - - -0.0.4 - 2020-10-30 ------------------- - -- :gh:`4` implement an executor with ``loky``. - - -0.0.3 - 2020-09-12 ------------------- - -- :gh:`3` align the program with pytask v0.0.6. - - -0.0.2 - 2020-08-12 ------------------- - -- :gh:`1` prepares the plugin for pytask v0.0.5. -- :gh:`2` better parsing and callbacks. - - -0.0.1 - 2020-07-17 ------------------- - -- Initial commit which combined the whole effort to release v0.0.1. diff --git a/MANIFEST.in b/MANIFEST.in index 5f6dd4e..de254c6 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,12 +1,9 @@ -prune .conda prune tests -exclude *.rst +exclude *.md exclude *.yaml exclude *.yml exclude tox.ini -include README.rst +include README.md include LICENSE -include versioneer.py -include src/pytask_parallel/_version.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..51dd775 --- /dev/null +++ b/README.md @@ -0,0 +1,96 @@ +# pytask-parallel + +[![PyPI](https://img.shields.io/pypi/v/pytask-parallel?color=blue)](https://pypi.org/project/pytask-parallel) +[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pytask-parallel)](https://pypi.org/project/pytask-parallel) +[![image](https://img.shields.io/conda/vn/conda-forge/pytask-parallel.svg)](https://anaconda.org/conda-forge/pytask-parallel) +[![image](https://img.shields.io/conda/pn/conda-forge/pytask-parallel.svg)](https://anaconda.org/conda-forge/pytask-parallel) +[![PyPI - License](https://img.shields.io/pypi/l/pytask-parallel)](https://pypi.org/project/pytask-parallel) +[![image](https://img.shields.io/github/workflow/status/pytask-dev/pytask-parallel/Continuous%20Integration%20Workflow/main)](https://github.com/pytask-dev/pytask-parallel/actions?query=branch%3Amain) +[![image](https://codecov.io/gh/pytask-dev/pytask-parallel/branch/main/graph/badge.svg)](https://codecov.io/gh/pytask-dev/pytask-parallel) +[![pre-commit.ci status](https://results.pre-commit.ci/badge/github/pytask-dev/pytask-parallel/main.svg)](https://results.pre-commit.ci/latest/github/pytask-dev/pytask-parallel/main) +[![image](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) + +______________________________________________________________________ + +Parallelize the execution of tasks with `pytask-parallel` which is a plugin for +[pytask](https://github.com/pytask-dev/pytask). + +## Installation + +pytask-parallel is available on [PyPI](https://pypi.org/project/pytask-parallel) and +[Anaconda.org](https://anaconda.org/conda-forge/pytask-parallel). Install it with + +```console +$ pip install pytask-parallel + +# or + +$ conda install -c conda-forge pytask-parallel +``` + +By default, the plugin uses `loky`'s robust implementation of the `ProcessPoolExecutor`. + +It is also possible to select the `ProcessPoolExecutor` or `ThreadPoolExecutor` from the +[concurrent.futures](https://docs.python.org/3/library/concurrent.futures.html) module +as backends to execute tasks asynchronously. + +## Usage + +To parallelize your tasks across many workers, pass an integer greater than 1 or +`'auto'` to the command-line interface. + +```console +$ pytask -n 2 +$ pytask --n-workers 2 + +# Starts os.cpu_count() - 1 workers. +$ pytask -n auto +``` + +Using processes to parallelize the execution of tasks is useful for CPU bound tasks such +as numerical computations. ([Here](https://stackoverflow.com/a/868577/7523785) is an +explanation on what CPU or IO bound means.) + +For IO bound tasks, tasks where the limiting factor are network responses, access to +files, you can parallelize via threads. + +```console +$ pytask --parallel-backend threads +``` + +You can also set the options in a `pyproject.toml`. + +```toml +# This is the default configuration. Note that, parallelization is turned off. + +[tool.pytask.ini_options] +n_workers = 1 +parallel_backend = loky # or processes or threads +``` + +## Warning + +It is not possible to combine parallelization with debugging. That is why `--pdb` or +`--trace` deactivate parallelization. + +If you parallelize the execution of your tasks using two or more workers, do not use +`breakpoint()` or `import pdb; pdb.set_trace()` since both will cause exceptions. + +## Changes + +Consult the [release notes](CHANGES.md) to find out about what is new. + +## Development + +- `pytask-parallel` does not call the `pytask_execute_task_protocol` hook + specification/entry-point because `pytask_execute_task_setup` and + `pytask_execute_task` need to be separated from `pytask_execute_task_teardown`. Thus, + plugins which change this hook specification may not interact well with the + parallelization. + +- There are two PRs for CPython which try to re-enable setting custom reducers which + should have been working, but does not. Here are the references. + + > - + > - + > - diff --git a/README.rst b/README.rst deleted file mode 100644 index c468e89..0000000 --- a/README.rst +++ /dev/null @@ -1,132 +0,0 @@ -.. image:: https://img.shields.io/pypi/v/pytask-parallel?color=blue - :alt: PyPI - :target: https://pypi.org/project/pytask-parallel - -.. image:: https://img.shields.io/pypi/pyversions/pytask-parallel - :alt: PyPI - Python Version - :target: https://pypi.org/project/pytask-parallel - -.. image:: https://img.shields.io/conda/vn/conda-forge/pytask-parallel.svg - :target: https://anaconda.org/conda-forge/pytask-parallel - -.. image:: https://img.shields.io/conda/pn/conda-forge/pytask-parallel.svg - :target: https://anaconda.org/conda-forge/pytask-parallel - -.. image:: https://img.shields.io/pypi/l/pytask-parallel - :alt: PyPI - License - :target: https://pypi.org/project/pytask-parallel - -.. image:: https://img.shields.io/github/workflow/status/pytask-dev/pytask-parallel/Continuous%20Integration%20Workflow/main - :target: https://github.com/pytask-dev/pytask-parallel/actions?query=branch%3Amain - -.. image:: https://codecov.io/gh/pytask-dev/pytask-parallel/branch/main/graph/badge.svg - :target: https://codecov.io/gh/pytask-dev/pytask-parallel - -.. image:: https://results.pre-commit.ci/badge/github/pytask-dev/pytask-parallel/main.svg - :target: https://results.pre-commit.ci/latest/github/pytask-dev/pytask-parallel/main - :alt: pre-commit.ci status - -.. image:: https://img.shields.io/badge/code%20style-black-000000.svg - :target: https://github.com/psf/black - - ------- - -pytask-parallel -=============== - -Parallelize the execution of tasks with ``pytask-parallel`` which is a plugin for -`pytask `_. - - -Installation ------------- - -pytask-parallel is available on `PyPI `_ and -`Anaconda.org `_. Install it with - -.. code-block:: console - - $ pip install pytask-parallel - - # or - - $ conda install -c conda-forge pytask-parallel - -By default, the plugin uses ``loky``'s robust implementation of the -``ProcessPoolExecutor``. - -It is also possible to select the ``ProcessPoolExecutor`` or ``ThreadPoolExecutor`` from -the `concurrent.futures `_ -module as backends to execute tasks asynchronously. - - -Usage ------ - -To parallelize your tasks across many workers, pass an integer greater than 1 or -``'auto'`` to the command-line interface. - -.. code-block:: console - - $ pytask -n 2 - $ pytask --n-workers 2 - - # Starts os.cpu_count() - 1 workers. - $ pytask -n auto - - -Using processes to parallelize the execution of tasks is useful for CPU bound tasks such -as numerical computations. (`Here `_ is an -explanation on what CPU or IO bound means.) - -For IO bound tasks, tasks where the limiting factor are network responses, access to -files, you can parallelize via threads. - -.. code-block:: console - - $ pytask --parallel-backend threads - -You can also set the options in one of the configuration files (``pytask.ini``, -``tox.ini``, or ``setup.cfg``). - -.. code-block:: ini - - # This is the default configuration. Note that, parallelization is turned off. - - [pytask] - n_workers = 1 - parallel_backend = loky # or processes or threads - - -Warning -------- - -It is not possible to combine parallelization with debugging. That is why ``--pdb`` or -``--trace`` deactivate parallelization. - -If you parallelize the execution of your tasks using two or more workers, do not use -``breakpoint()`` or ``import pdb; pdb.set_trace()`` since both will cause exceptions. - - -Changes -------- - -Consult the `release notes `_ to find out about what is new. - - -Development ------------ - -- ``pytask-parallel`` does not call the ``pytask_execute_task_protocol`` hook - specification/entry-point because ``pytask_execute_task_setup`` and - ``pytask_execute_task`` need to be separated from ``pytask_execute_task_teardown``. - Thus, plugins which change this hook specification may not interact well with the - parallelization. - -- There are two PRs for CPython which try to re-enable setting custom reducers which - should have been working, but does not. Here are the references. - - + https://bugs.python.org/issue28053 - + https://github.com/python/cpython/pull/9959 - + https://github.com/python/cpython/pull/15058 diff --git a/environment.yml b/environment.yml index c7347c3..e6ab11d 100644 --- a/environment.yml +++ b/environment.yml @@ -16,30 +16,15 @@ dependencies: - conda-verify # Package dependencies - - pytask >= 0.1.0 + - pytask >=0.2.0 - cloudpickle - loky - - # pytask package dependencies - - attrs >=17.4.0 - - click - - click-default-group - - networkx >=2.4 - - pluggy - - pony >=0.7.13 - - rich + - pybaum >=0.1.1 # Misc - black - - bump2version - - jupyterlab - - matplotlib - - pdbpp - pre-commit - pytest-cov - # Documentation - - numpydoc - - pydata-sphinx-theme - - sphinx - - sphinx-copybutton + - pip: + - -e . diff --git a/pyproject.toml b/pyproject.toml index c42ef4b..72554a8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,6 @@ [build-system] requires = ["setuptools>=45", "wheel", "setuptools_scm[toml]>=6.0"] +build-backend = "setuptools.build_meta" [tool.setuptools_scm] diff --git a/setup.cfg b/setup.cfg index 37285d5..83a4627 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,8 +1,8 @@ [metadata] name = pytask_parallel description = Parallelize the execution of tasks with pytask. -long_description = file: README.rst -long_description_content_type = text/x-rst +long_description = file: README.md +long_description_content_type = text/markdown url = https://github.com/pytask-dev/pytask-parallel author = Tobias Raabe author_email = raabe@posteo.de @@ -10,7 +10,7 @@ license = MIT license_file = LICENSE platforms = any classifiers = - Development Status :: 3 - Alpha + Development Status :: 4 - Beta License :: OSI Approved :: MIT License Operating System :: OS Independent Programming Language :: Python :: 3 @@ -20,7 +20,7 @@ classifiers = Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.10 project_urls = - Changelog = https://github.com/pytask-dev/pytask-parallel/blob/main/CHANGES.rst + Changelog = https://github.com/pytask-dev/pytask-parallel/blob/main/CHANGES.md Documentation = https://github.com/pytask-dev/pytask-parallel Github = https://github.com/pytask-dev/pytask-parallel Tracker = https://github.com/pytask-dev/pytask-parallel/issues @@ -31,7 +31,8 @@ install_requires = click cloudpickle loky - pytask>=0.1.7 + pybaum>=0.1.1 + pytask>=0.2 python_requires = >=3.7 include_package_data = True package_dir = =src diff --git a/setup.py b/setup.py deleted file mode 100644 index fa283bc..0000000 --- a/setup.py +++ /dev/null @@ -1,6 +0,0 @@ -from __future__ import annotations - -from setuptools import setup - -if __name__ == "__main__": - setup() diff --git a/src/pytask_parallel/backends.py b/src/pytask_parallel/backends.py index d6b06d2..8828bef 100644 --- a/src/pytask_parallel/backends.py +++ b/src/pytask_parallel/backends.py @@ -14,8 +14,8 @@ try: from loky import get_reusable_executor - - PARALLEL_BACKENDS["loky"] = get_reusable_executor - PARALLEL_BACKENDS_DEFAULT = "loky" except ImportError: pass +else: + PARALLEL_BACKENDS["loky"] = get_reusable_executor + PARALLEL_BACKENDS_DEFAULT = "loky" diff --git a/src/pytask_parallel/build.py b/src/pytask_parallel/build.py index 4dcd2e6..043b807 100644 --- a/src/pytask_parallel/build.py +++ b/src/pytask_parallel/build.py @@ -2,7 +2,7 @@ from __future__ import annotations import click -from _pytask.config import hookimpl +from pytask import hookimpl from pytask_parallel.backends import PARALLEL_BACKENDS from pytask_parallel.backends import PARALLEL_BACKENDS_DEFAULT diff --git a/src/pytask_parallel/config.py b/src/pytask_parallel/config.py index 6e90d1c..e1303ec 100644 --- a/src/pytask_parallel/config.py +++ b/src/pytask_parallel/config.py @@ -2,9 +2,10 @@ from __future__ import annotations import os +from typing import Any +from typing import Callable -from _pytask.config import hookimpl -from _pytask.shared import get_first_non_none_value +from pytask import hookimpl from pytask_parallel.backends import PARALLEL_BACKENDS_DEFAULT from pytask_parallel.callbacks import delay_callback from pytask_parallel.callbacks import n_workers_callback @@ -14,7 +15,7 @@ @hookimpl def pytask_parse_config(config, config_from_cli, config_from_file): """Parse the configuration.""" - config["n_workers"] = get_first_non_none_value( + config["n_workers"] = _get_first_non_none_value( config_from_cli, config_from_file, key="n_workers", @@ -24,7 +25,7 @@ def pytask_parse_config(config, config_from_cli, config_from_file): if config["n_workers"] == "auto": config["n_workers"] = max(os.cpu_count() - 1, 1) - config["delay"] = get_first_non_none_value( + config["delay"] = _get_first_non_none_value( config_from_cli, config_from_file, key="delay", @@ -32,7 +33,7 @@ def pytask_parse_config(config, config_from_cli, config_from_file): callback=delay_callback, ) - config["parallel_backend"] = get_first_non_none_value( + config["parallel_backend"] = _get_first_non_none_value( config_from_cli, config_from_file, key="parallel_backend", @@ -46,3 +47,20 @@ def pytask_post_parse(config): """Disable parallelization if debugging is enabled.""" if config["pdb"] or config["trace"]: config["n_workers"] = 1 + + +def _get_first_non_none_value( + *configs: dict[str, Any], + key: str, + default: Any | None = None, + callback: Callable[..., Any] | None = None, +) -> Any: + """Get the first non-None value for a key from a list of dictionaries. + + This function allows to prioritize information from many configurations by changing + the order of the inputs while also providing a default. + + """ + callback = (lambda x: x) if callback is None else callback # noqa: E731 + processed_values = (callback(config.get(key)) for config in configs) + return next((value for value in processed_values if value is not None), default) diff --git a/src/pytask_parallel/execute.py b/src/pytask_parallel/execute.py index f8f93b2..dd31958 100644 --- a/src/pytask_parallel/execute.py +++ b/src/pytask_parallel/execute.py @@ -1,15 +1,17 @@ """Contains code relevant to the execution.""" from __future__ import annotations +import inspect import sys import time from typing import Any import cloudpickle -from _pytask.config import hookimpl -from _pytask.console import console -from _pytask.report import ExecutionReport -from _pytask.traceback import remove_internal_traceback_frames_from_exc_info +from pybaum.tree_util import tree_map +from pytask import console +from pytask import ExecutionReport +from pytask import hookimpl +from pytask import remove_internal_traceback_frames_from_exc_info from pytask_parallel.backends import PARALLEL_BACKENDS from rich.console import ConsoleOptions from rich.traceback import Traceback @@ -18,9 +20,9 @@ @hookimpl def pytask_post_parse(config): """Register the parallel backend.""" - if config["parallel_backend"] in ["loky", "processes"]: + if config["parallel_backend"] in ("loky", "processes"): config["pm"].register(ProcessesNameSpace) - elif config["parallel_backend"] in ["threads"]: + elif config["parallel_backend"] in ("threads",): config["pm"].register(DefaultBackendNameSpace) @@ -148,16 +150,23 @@ def pytask_execute_task(session, task): # noqa: N805 """ if session.config["n_workers"] > 1: - bytes_ = cloudpickle.dumps(task) + kwargs = _create_kwargs_for_task(task) + + bytes_function = cloudpickle.dumps(task) + bytes_kwargs = cloudpickle.dumps(kwargs) + return session.executor.submit( _unserialize_and_execute_task, - bytes_=bytes_, + bytes_function=bytes_function, + bytes_kwargs=bytes_kwargs, show_locals=session.config["show_locals"], console_options=console.options, ) -def _unserialize_and_execute_task(bytes_, show_locals, console_options): +def _unserialize_and_execute_task( + bytes_function, bytes_kwargs, show_locals, console_options +): """Unserialize and execute task. This function receives bytes and unpickles them to a task which is them execute @@ -166,10 +175,11 @@ def _unserialize_and_execute_task(bytes_, show_locals, console_options): """ __tracebackhide__ = True - task = cloudpickle.loads(bytes_) + task = cloudpickle.loads(bytes_function) + kwargs = cloudpickle.loads(bytes_kwargs) try: - task.execute() + task.execute(**kwargs) except Exception: exc_info = sys.exc_info() processed_exc_info = _process_exception(exc_info, show_locals, console_options) @@ -199,4 +209,18 @@ def pytask_execute_task(session, task): # noqa: N805 """ if session.config["n_workers"] > 1: - return session.executor.submit(task.execute) + kwargs = _create_kwargs_for_task(task) + return session.executor.submit(task.execute, **kwargs) + + +def _create_kwargs_for_task(task): + """Create kwargs for task function.""" + kwargs = {**task.kwargs} + + func_arg_names = set(inspect.signature(task.function).parameters) + for arg_name in ("depends_on", "produces"): + if arg_name in func_arg_names: + attribute = getattr(task, arg_name) + kwargs[arg_name] = tree_map(lambda x: x.value, attribute) + + return kwargs diff --git a/src/pytask_parallel/logging.py b/src/pytask_parallel/logging.py index 1bb8ca0..1fbbfc1 100644 --- a/src/pytask_parallel/logging.py +++ b/src/pytask_parallel/logging.py @@ -1,8 +1,8 @@ """Contains code relevant to logging.""" from __future__ import annotations -from _pytask.config import hookimpl -from _pytask.console import console +from pytask import console +from pytask import hookimpl @hookimpl(trylast=True) diff --git a/src/pytask_parallel/plugin.py b/src/pytask_parallel/plugin.py index b315e8c..5fa661f 100644 --- a/src/pytask_parallel/plugin.py +++ b/src/pytask_parallel/plugin.py @@ -1,7 +1,7 @@ """Entry-point for the plugin.""" from __future__ import annotations -from _pytask.config import hookimpl +from pytask import hookimpl from pytask_parallel import build from pytask_parallel import config from pytask_parallel import execute diff --git a/tests/test_cli.py b/tests/test_cli.py index c3a8cab..f9cb0ec 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -5,6 +5,7 @@ import pytest from pytask import cli +from pytask import ExitCode @pytest.mark.end_to_end @@ -22,5 +23,5 @@ def task_1(produces): result = runner.invoke(cli, [tmp_path.as_posix(), "-n", "2", "--delay", "5"]) end = time() - assert result.exit_code == 0 + assert result.exit_code == ExitCode.OK assert end - start > 5 diff --git a/tests/test_config.py b/tests/test_config.py index eaadc58..faca99f 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -4,6 +4,7 @@ import textwrap import pytest +from pytask import ExitCode from pytask import main from pytask_parallel.backends import PARALLEL_BACKENDS @@ -25,35 +26,48 @@ def test_interplay_between_debugging_and_parallel(tmp_path, pdb, n_workers, expe @pytest.mark.end_to_end -@pytest.mark.parametrize("config_file", ["pytask.ini", "tox.ini", "setup.cfg"]) +@pytest.mark.parametrize( + "config_file", ["pytask.ini", "tox.ini", "setup.cfg", "pyproject.toml"] +) @pytest.mark.parametrize( "configuration_option, value, exit_code", [ - ("n_workers", "auto", 0), - ("n_workers", 1, 0), - ("n_workers", 2, 0), - ("delay", 0.1, 0), - ("delay", 1, 0), - ("parallel_backend", "unknown_backend", 2), + ("n_workers", "auto", ExitCode.OK), + ("n_workers", 1, ExitCode.OK), + ("n_workers", 2, ExitCode.OK), + ("delay", 0.1, ExitCode.OK), + ("delay", 1, ExitCode.OK), + ("parallel_backend", "unknown_backend", ExitCode.CONFIGURATION_FAILED), ] + [ - ("parallel_backend", parallel_backend, 0) + ("parallel_backend", parallel_backend, ExitCode.OK) for parallel_backend in PARALLEL_BACKENDS ], ) def test_reading_values_from_config_file( - tmp_path, config_file, configuration_option, value, exit_code + tmp_path, capsys, config_file, configuration_option, value, exit_code ): - config = f""" - [pytask] - {configuration_option} = {value} - """ + if config_file == "pyproject.toml": + config = f""" + [tool.pytask.ini_options] + {configuration_option} = {value!r} + """ + else: + config = f""" + [pytask] + {configuration_option} = {value} + """ tmp_path.joinpath(config_file).write_text(textwrap.dedent(config)) session = main({"paths": tmp_path}) + captured = capsys.readouterr() + if config_file == "pyproject.toml": + assert "WARNING" not in captured.out + else: + assert "WARNING" in captured.out assert session.exit_code == exit_code if value == "auto": value = os.cpu_count() - 1 - if session.exit_code == 0: + if session.exit_code == ExitCode.OK: assert session.config[configuration_option] == value diff --git a/tests/test_execute.py b/tests/test_execute.py index e91dc68..8f8f65f 100644 --- a/tests/test_execute.py +++ b/tests/test_execute.py @@ -2,25 +2,19 @@ import pickle import textwrap +from pathlib import Path from time import time -import attr import pytest from pytask import cli +from pytask import ExitCode from pytask import main +from pytask import Task from pytask_parallel.backends import PARALLEL_BACKENDS from pytask_parallel.execute import DefaultBackendNameSpace from pytask_parallel.execute import ProcessesNameSpace -@attr.s -class DummyTask: - function = attr.ib() - - def execute(self): - self.function() - - class Session: pass @@ -46,7 +40,7 @@ def task_2(produces): session = main({"paths": tmp_path}) - assert session.exit_code == 0 + assert session.exit_code == ExitCode.OK assert session.execution_end - session.execution_start > 10 tmp_path.joinpath("out_1.txt").unlink() @@ -56,7 +50,7 @@ def task_2(produces): {"paths": tmp_path, "n_workers": 2, "parallel_backend": parallel_backend} ) - assert session.exit_code == 0 + assert session.exit_code == ExitCode.OK assert session.execution_end - session.execution_start < 10 @@ -83,7 +77,7 @@ def task_2(produces): result = runner.invoke(cli, [tmp_path.as_posix()]) end = time() - assert result.exit_code == 0 + assert result.exit_code == ExitCode.OK assert end - start > 10 tmp_path.joinpath("out_1.txt").unlink() @@ -102,7 +96,7 @@ def task_2(produces): ) end = time() - assert result.exit_code == 0 + assert result.exit_code == ExitCode.OK assert "Started 2 workers." in result.output assert end - start < 10 @@ -118,7 +112,7 @@ def myfunc(): with pytest.raises(AttributeError): pickle.dumps(myfunc) - task = DummyTask(myfunc) + task = Task(base_name="task_example", path=Path(), function=myfunc) session = Session() session.config = { @@ -169,6 +163,7 @@ def task_2(produces): } ) + assert session.exit_code == ExitCode.OK assert 3 < session.execution_end - session.execution_start < 10 @@ -183,8 +178,7 @@ def task_1(): time.sleep(1) def task_2(): time.sleep(2); raise NotImplementedError @pytask.mark.try_last - def task_3(): - time.sleep(3) + def task_3(): time.sleep(3) """ tmp_path.joinpath("task_dummy.py").write_text(textwrap.dedent(source)) @@ -197,6 +191,7 @@ def task_3(): } ) + assert session.exit_code == ExitCode.FAILED assert len(session.tasks) == 3 assert len(session.execution_reports) == 2 @@ -210,25 +205,25 @@ def test_task_priorities(tmp_path, parallel_backend): @pytask.mark.try_first def task_0(): - time.sleep(1) + time.sleep(0.1) def task_1(): - time.sleep(1) + time.sleep(0.1) @pytask.mark.try_last def task_2(): - time.sleep(1) + time.sleep(0.1) @pytask.mark.try_first def task_3(): - time.sleep(1) + time.sleep(0.1) def task_4(): - time.sleep(1) + time.sleep(0.1) @pytask.mark.try_last def task_5(): - time.sleep(1) + time.sleep(0.1) """ tmp_path.joinpath("task_dummy.py").write_text(textwrap.dedent(source)) @@ -236,7 +231,7 @@ def task_5(): {"paths": tmp_path, "parallel_backend": parallel_backend, "n_workers": 2} ) - assert session.exit_code == 0 + assert session.exit_code == ExitCode.OK first_task_name = session.execution_reports[0].task.name assert first_task_name.endswith("task_0") or first_task_name.endswith("task_3") last_task_name = session.execution_reports[-1].task.name @@ -263,7 +258,7 @@ def task_raising_error(): args.append("--show-locals") result = runner.invoke(cli, args) - assert result.exit_code == 1 + assert result.exit_code == ExitCode.FAILED assert "───── Traceback" in result.output assert ("───── locals" in result.output) is show_locals assert ("[0, 1, 2, 3, 4]" in result.output) is show_locals @@ -292,4 +287,4 @@ def task_example(produces): {"paths": tmp_path, "parallel_backend": parallel_backend, "n_workers": 2} ) - assert session.exit_code == 0 + assert session.exit_code == ExitCode.OK