diff --git a/.flake8 b/.flake8 new file mode 100644 index 000000000..b4a4668e7 --- /dev/null +++ b/.flake8 @@ -0,0 +1,71 @@ +[flake8] +count = True +show-source = True +doctests = True + +ignore = + # Missing docstring in __init__ + D107 + + # blank-line after doc summaries (annoying for modules' doc) + D205 + + # conflicts with D211: No blank lines allowed before class docstring + D203 + + # do not enforce first-line-period at module docs + D400 + + # conflicts with E133: closing bracket is missing indentation + E123 + + # Line break after binary operator + W504 + + # Invalid escape sequence + W605 + + # Local variable never used + F841 + + # Use of == / != to compare constant literals + F632 + + # First line should be imperative mood + D401 + + # Do not perform function calls in argument defaults. + B008 + + # Do not call getattr with a constant attribute value, it is not any safer than normal property access + B009 + + # Do not call assert False since python -O removes these calls. Instead callers should raise AssertionError(). + B011 + +exclude = + .tox, + .git, + __pycache__, + docs, + config, + build, + dist, + *.pyc, + *.egg-info, + .cache, + .eggs, + src/orion/core/_version.py, + src/orion/core/utils/_appdirs.py + +# Line length +max-line-length = 100 + +# McCabe complexity checker +max-complexity = 20 + +# flake8-import-order: style +import-order-style = google + +# flake8-import-order: local module name checker +application-import-names = orion, versioneer diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 000000000..c352d38d2 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,29 @@ +--- +name: Bug report +about: Found a bug? Tell us about it! +title: '' +labels: bug +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Steps to reproduce** +Describe the steps to reproduce the bug, if applicable: + +**Environment (please complete the following information):** + - OS: [e.g. Ubuntu 18.04] + - Python version: [e.g. 3.7] + - Oríon version: [e.g. 0.1.7] + - Database: [e.g. PickleDB, MongoDB] + +**Additional context** +If applicable, attach any log of screenshots that can help explain the problem. + +** Possible solution** +If you think you know what the problem is, let us know! Your opinion helps us. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 000000000..886df0239 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,26 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: feature request +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** + +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** + + - A clear and concise description of what you want to happen. + - Motivate how this feature fits [Oríon's mission](https://github.com/Epistimio/orion). + - Keep the scope as narrow as possible, to make it easier to implement. + +**Describe alternatives you've considered** + +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** + +Add any other details, context or screenshots about the feature request here. diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 000000000..ab714dfdf --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,34 @@ +_Hi there! Thank you for contributing. Feel free to use this pull request template; It helps us reviewing your work at its true value._ + +_Please remove the instructions in italics before posting the pull request :)._ + +# Description +_Describe what is the purpose of this pull request and why it should be integrated to the repository. +When your changes modify the current behavior, explain why your solution is better._ + +_If it solves a GitHub issue, be sure to [link it](https://help.github.com/en/github/writing-on-github/autolinked-references-and-urls#issues-and-pull-requests)._ + +# Changes +_Give an overview of the suggested solution._ + +# Checklist +_This is simply a reminder of what we are going to look for before merging your code._ + +_Add an `x` in the boxes that apply._ +_If you're unsure about any of them, don't hesitate to ask. We're here to help!_ +_You can also fill these out after creating the PR if it's a work in progress (be sure to publish the PR as a draft then)_ + +## Tests +- [ ] I added corresponding tests for bug fixes and new features. If possible, the tests fail without the changes +- [ ] All new and existing tests are passing (`$ tox -e py38`; replace `38` by your Python version if necessary) + +## Documentation +- [ ] I have updated the relevant documentation related to my changes + +## Quality +- [ ] I have read the [CONTRIBUTING](https://github.com/Epistimio/orion/blob/develop/CONTRIBUTING.md) doc +- [ ] My commits messages follow [this format](https://chris.beams.io/posts/git-commit/) +- [ ] My code follows the style guidelines (`$ tox -e lint`) + +# Further comments +_Please include any additional information or comment that you feel will be helpful to the review of this pull request._ diff --git a/.gitignore b/.gitignore index db370f1aa..2c6c0c3cf 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ *~ *.sw* .idea/ +.vscode/ .spyderproject .spyproject .ropeproject @@ -9,6 +10,9 @@ *.pkl *.lock +# OS generated files +.DS_Store + # StarUML documentation *.mdj diff --git a/.pylintrc b/.pylintrc index 2d27b6413..a2cf8aea8 100644 --- a/.pylintrc +++ b/.pylintrc @@ -51,7 +51,8 @@ confidence= # --enable=similarities". If you want to run only the classes checker, but have # no Warning level messages displayed, use"--disable=all --enable=classes # --disable=W" -disable=abstract-class-instantiated,useless-super-delegation,no-member,keyword-arg-before-vararg,unidiomatic-typecheck,redefined-outer-name,fixme,F0401,intern-builtin,wrong-import-position,wrong-import-order +disable=abstract-class-instantiated,useless-super-delegation,no-member,keyword-arg-before-vararg,unidiomatic-typecheck,redefined-outer-name,fixme,F0401,intern-builtin,wrong-import-position,wrong-import-order, + C0415, F0010, R0205, R1705, R1711, R1720, W0106, W0107, W0127, W0706 # Enable the message, report, category or checker with the given id(s). You can # either give multiple identifier separated by comma (,) or put this option @@ -390,7 +391,7 @@ max-locals=20 max-parents=7 # Maximum number of public methods for a class (see R0904). -max-public-methods=10 +max-public-methods=12 # Maximum number of return / yield for function / method body max-returns=6 diff --git a/.travis.yml b/.travis.yml index ea0d6c42a..e4e8a35b7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -21,9 +21,6 @@ jobs: after_success: - tox -e final-coverage - tox -e codecov - env: TOXENV=py35 - python: 3.5 - - <<: *test env: TOXENV=py36 python: 3.6 - <<: *test @@ -34,7 +31,12 @@ jobs: - <<: *test env: TOXENV=py37 python: 3.7 - dist: xenial + dist: bionic + sudo: true + - <<: *test + env: TOXENV=py38 + python: 3.8 + dist: bionic sudo: true - <<: *test env: TOXENV=demo-random diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 000000000..9cae30b63 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,55 @@ +# Contributing to Oríon +Hi there 👋. Thank you for considering contributing to Oríon 🎉. We're excited to have you here! + +We present the guidelines for contributing to the project. They are not hard rules, use your best judgment, and feel free to propose changes to this document in a pull request. If you havn't already, it's a good idea to quickly pass through our [code of conduct](https://github.com/Epistimio/orion/blob/develop/CODE_OF_CONDUCT.md) to ensure everyone has a good time. + +## Where do I go from here? +If you have a question, found a bug or have a feature request, you're welcome to open a new issue at https://github.com/Epistimio/orion/issues. It's generally best if you get confirmation of your bug or approval for your feature request before starting to code. + +Depending on what you want to do, we're suggesting you read the respective guidelines: +- [Asking a question](###how-to-ask-a-question) +- [Reporting a bug](###how-to-report-a-bug) +- [Proposing enhancements](###how-to-propose-enhancements) +- [Submitting changes](###how-to-submit-changes) + +In some cases, you might want to add a new algorithm for Oríon, make sure to check out the [plugin documentation](https://orion.readthedocs.io/en/latest/plugins/base.html). + +### How to ask a question +Asking a question is also a contribution to the project! We'll be happy to help you if you have any question. Before opening a new issue, make sure to do a quick search. It's possible your question has already been answered! Otherwise, go ahead. We're looking forward to help you. + +### How to report a bug +You found a bug? Great! Before submitting, make sure you're experiencing the bug on the latest version of Oríon and that it's not already opened in our issue tracker. + +Please use the [bug report template](https://github.com/Epistimio/orion/issues/new/choose) to open a new bug; make sure to choose a clear and descriptive title for your issue. + +### How to propose enhancements +We're thrilled to hear you found a way to make Oríon better through minor improvements to completely new features! +Before creating enhancement suggestions, please check that your idea is not already present in the list of issues. You might find that you don't need to create one and can just join in the discussion directly and give your opinion! + +Please use the [feature request template](https://github.com/Epistimio/orion/issues/new/choose) to propose changes; make sure to choose a clear and descriptive title for your issue. + +### How to submit changes +We're grateful you're considering making changes to Oríon! + +To get started, you need to first fork the repository and then create a new branch on your fork where you'll do your changes. + +Once you implemented the changes, make sure to [rebase](https://www.atlassian.com/git/tutorials/rewriting-history/git-rebase) your branch on the latest Oríon's *develop* branch and finally submit a [pull request](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests). +All changes to Oríon are done through PRs, where they will be peer-reviewed and checked against our continuous integration system to ensure the quality of the code base. + +During this process keep in mind to: +- Set a descriptive and short branch name +- Create a descriptive and clear title for the PR +- Motivate the *why* and *how* of the PR (get inspiration from current or past PRs!) +- Write [good commits](https://chris.beams.io/posts/git-commit/) +- Include tests for new features or bug fixes +- Update the source code documentation (docstring) and, if applicable, the [general documentation](https://orion.readthedocs.io/en/latest/index.html) + +Next, you need to get familiar with the developer documentation. You will find the instructions to set up your development environment, including how to use the test suite and verify you changes will pass the CI as well as our style guides for source code and documentation. + +-> https://orion.readthedocs.io/en/latest/developer/overview.html + +#### Your first contribution +If you are not sure about what to work on first, we suggest you take a look on the [help wanted](https://github.com/Epistimio/orion/labels/help%20wanted) and [good first issues](https://github.com/Epistimio/orion/labels/good%20first%20issue) opened. They are great places to start! + +--- +Thank you for contributing! We really appreciate it! diff --git a/LICENSE b/LICENSE index c0f91404a..980ddbd08 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ Software License Agreement (BSD License) - Copyright (c) 2017-2018, Epistímio. + Copyright (c) 2017-2020, Epistímio. All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/MANIFEST.in b/MANIFEST.in index dc7887d05..36ca3d68a 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -2,6 +2,7 @@ include LICENSE include *.rst include ROADMAP.md +include CONTRIBUTING.md exclude RESEARCH.md # Control and setup helpers @@ -13,15 +14,19 @@ exclude .travis.yml exclude tox.ini exclude *-requirements.txt exclude .pylintrc +exclude .flake8 exclude codecov.yml exclude .mailmap prune conda/ # Include src, tests, docs -recursive-include docs *.rst *.py *.gitkeep +recursive-include docs *.rst *.py *.gitkeep *.png include docs/requirements.txt prune docs/build prune docs/src/reference recursive-include src *.py recursive-include tests *.py *.yaml *.json *.txt *.yml *.in LICENSE include tests/requirements.txt + +# Include examples +recursive-include examples *.md *.py */requirements.txt diff --git a/README.rst b/README.rst index 4a9ee2fca..35dc8b392 100644 --- a/README.rst +++ b/README.rst @@ -2,7 +2,8 @@ Oríon ***** -|pypi| |py_versions| |license| |rtfd| |codecov| |travis| +|pypi| |py_versions| |license| |doi| +|rtfd| |codecov| |travis| .. |pypi| image:: https://img.shields.io/pypi/v/orion.svg :target: https://pypi.python.org/pypi/orion @@ -16,8 +17,12 @@ Oríon :target: https://opensource.org/licenses/BSD-3-Clause :alt: BSD 3-clause license -.. |rtfd| image:: https://readthedocs.org/projects/orion/badge/?version=latest - :target: https://orion.readthedocs.io/en/latest/?badge=latest +.. |doi| image:: https://zenodo.org/badge/102697867.svg + :target: https://zenodo.org/badge/latestdoi/102697867 + :alt: DOI + +.. |rtfd| image:: https://readthedocs.org/projects/orion/badge/?version=stable + :target: https://orion.readthedocs.io/en/stable/?badge=stable :alt: Documentation Status .. |codecov| image:: https://codecov.io/gh/Epistimio/orion/branch/master/graph/badge.svg @@ -43,53 +48,44 @@ now what you have to do looks like this: ``orion -n experiment_name ./run.py --mini-batch~'randint(32, 256)'`` -Check out `user's guide-101`_ for the simplest of demonstrations! - -.. _user's guide-101: https://orion.readthedocs.io/en/latest/user/pytorch.html - -Features -======== -*As simple and as complex you want* - -- Simple and natural, but also explicit and verbose, search domain definitions -- Minimal and non-intrusive client interface for reporting - target function values -- Database logging (currently powered by MongoDB_) -- Flexible configuration -- Explicit experiment termination conditions -- Algorithms algorithms algorithms: - Skopt_'s bayesian optimizers are at hand without writing. - Random search is the default. - **only** a single line of code. -- More algorithms: - Implementing and distributing algorithms is as easy as possible! - Check `developer's guide-101`_. Expect algorithm plugins to pop out quickly! -- Came up with an idea? - Your intuition is still at play: - Help your optima hunter now by a command line interface. -- And other many more already there or coming soon! - -.. _MongoDB: https://www.mongodb.com/ -.. _Skopt: https://scikit-optimize.github.io/ -.. _developer's guide-101: https://orion.readthedocs.io/en/latest/developer/testing.html +Check out our `getting started guide`_ or `this presentation +`_ +for an overview, or our `scikit-learn example`_ for a more hands-on experience. Finally we encourage you +to browse our `documentation`_. + +.. _getting started guide: https://orion.readthedocs.io/en/stable/install/gettingstarted.html +.. _documentation: https://orion.readthedocs.io/ +.. _scikit-learn example: https://orion.readthedocs.io/en/stable/tutorials/scikit-learn.html + +Why Oríon? +========== +*Effortless to adopt, deeply customizable* + +- `Adopt it `_ with a single line of code +- `Natively asynchronous `_, thus resilient and easy to parallelize +- Offers the latest established hyperparameter `algorithms `_ +- Elegant and rich `search-space definitions `_ +- Comprehensive `configuration `_ system with smart defaults +- Transparent persistence in local or remote `database `_ +- `Integrate seamlessly `_ your own + hyper-optimization algorithms +- `Language `_ + and `configuration file `_ agnostic Installation ============ -Install Oríon by running: - -``pip install orion`` - -For more information read the `full installation docs`_. +Install Oríon by running ``$ pip install orion``. For more information consult the `installation +guide`_. -.. _full installation docs: https://orion.readthedocs.io/en/latest/install/core.html +.. _installation guide: https://orion.readthedocs.io/en/stable/install/core.html Contribute or Ask ================= Do you have a question or issues? Do you want to report a bug or suggest a feature? Name it! -Please contact us by opening an issue in our repository below: +Please contact us by opening an issue in our repository below and checkout our `contribution guidelines `_: - Issue Tracker: ``_ - Source Code: ``_ @@ -98,12 +94,42 @@ Start by starring and forking our Github repo! Thanks for the support! -Roadmap +Citation ======== -You can find our roadmap here: ``_ +If you use Oríon for published work, please cite our work using the following bibtex entry. + +.. code-block:: bibtex + + @software{xavier_bouthillier_2019_3478593, + author = {Xavier Bouthillier and + Christos Tsirigotis and + François Corneau-Tremblay and + Pierre Delaunay and + Reyhane Askari and + Dendi Suhubdy and + Michael Noukhovitch and + Dmitriy Serdyuk and + Arnaud Bergeron and + Peter Henderson and + Pascal Lamblin and + Mirko Bronzi and + Christopher Beckham}, + title = {Oríon - Asynchronous Distributed Hyperparameter Optimization}, + month = oct, + year = 2019, + publisher = {Zenodo}, + version = {v0.1.8}, + doi = {10.5281/zenodo.3478592}, + url = {https://doi.org/10.5281/zenodo.3478592} + } + +Roadmap +======= + +See `ROADMAP.md `_. License ======= -The project is licensed under the BSD license. +The project is licensed under the `BSD license `_. diff --git a/ROADMAP.md b/ROADMAP.md index cea67cc5b..9577d1e8f 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -1,24 +1,20 @@ # Roadmap -Last update October 8th, 2019 +Last update February 25th, 2020 ## Next releases - Short-Term +### v0.1.9 +#### Default database backend +PickledDB will become the default database backend instead of MongoDB. This initiative reduces +initial configuration, allowing users to get started faster. -### v0.1.8 +#### Deleting experiments +Adding a new (very requested) feature enabling the deletion of experiments from the CLI or python +API. -#### Preliminary Python API -Library API to simplify usage of algorithms without Oríon's worker. - -#### Journal Protocol Plugins -Offering: -- no need to setup DB, can use one's existing backend -- Can re-use tools provided by backend for visualizations, etc. - -## Next releases - Mid-Term - -### v0.2: ETA End of summer 2019 +### v0.2 #### Journal Protocol Plugins Offering: -- no need to setup DB, can use one's existing backend +- No need to setup DB, can use one's existing backend - Can re-use tools provided by backend for visualizations, etc. #### Python API @@ -34,21 +30,30 @@ results = dummy(**trial.arguments) experiment.observe(trial, results) ``` +### Algorithms +Introducing new algorithms: [TPE](https://papers.nips.cc/paper/4443-algorithms-for-hyper-parameter-optimization.pdf), [HyperBand](https://arxiv.org/abs/1603.06560) + +## Next releases - Mid-Term + ### v0.3 #### Generic `Optimizer` interface supporting various types of algorithms Change interface to support trial object instead of curated lists. This is necessary to support algorithms such as PBT. #### More Optimizers -- PBT -- BOHB - -## Next releases - Long-Term +- [PBT](https://arxiv.org/abs/1711.09846) +- [BOHB](https://ml.informatik.uni-freiburg.de/papers/18-ICML-BOHB.pdf) #### Simple dashboard specific to monitoring and benchmarking of Black-Box optimization - Specific to hyper parameter optimizations - Provide status of experiments +#### Leveraging previous experiences +Leveraging the knowledge base contained in the EVC of previous trials to optimize and drive new + trials. + +## Next releases - Long-Term + #### Conditional Space The Space class will be refactored on top of [ConfigSpace](https://automl.github.io/ConfigSpace). This will give access to [conditional parameters](https://automl.github.io/ConfigSpace/master/Guide.html#nd-example-categorical-hyperparameters-and-conditions) and [forbidden clauses](https://automl.github.io/ConfigSpace/master/Guide.html#rd-example-forbidden-clauses). diff --git a/codecov.yml b/codecov.yml index 799a0af4c..9eba7c166 100644 --- a/codecov.yml +++ b/codecov.yml @@ -40,7 +40,8 @@ coverage: default: branches: null - ignore: null + ignore: + - tests/* fixes: - .tox diff --git a/conda/conda_build.sh b/conda/conda_build.sh index b4352e26c..5ae822ba9 100755 --- a/conda/conda_build.sh +++ b/conda/conda_build.sh @@ -8,13 +8,18 @@ bash miniconda.sh -b -p $HOME/miniconda export PATH="$HOME/miniconda/bin:$PATH" hash -r conda config --set always_yes yes --set changeps1 no + +pip uninstall -y setuptools +conda install -c anaconda setuptools + conda update -q conda conda info -a conda install conda-build anaconda-client -conda build conda --python 3.5 +conda build conda --python 3.6 conda build conda --python 3.6 conda build conda --python 3.7 +conda build conda --python 3.8 if [[ -n "${TRAVIS_TAG}" ]] then diff --git a/dev-requirements.txt b/dev-requirements.txt index 0656b6785..8f78281a9 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,9 +1,2 @@ +# The only dev package necessary is `tox` in order to run the different environments used for testing, linting and releasing. tox -flake8 == 3.5.0 -flake8-import-order == 0.15 -flake8-docstrings == 1.1.0 -flake8-bugbear == 17.4.0 -pylint == 1.8.1 -doc8 == 0.8.0 -check-manifest -readme_renderer diff --git a/docs/src/_resources/one.png b/docs/src/_resources/one.png new file mode 100644 index 000000000..7b3ede31a Binary files /dev/null and b/docs/src/_resources/one.png differ diff --git a/docs/src/_resources/synchronization.png b/docs/src/_resources/synchronization.png new file mode 100644 index 000000000..c2b6b7d7c Binary files /dev/null and b/docs/src/_resources/synchronization.png differ diff --git a/docs/src/code/algo.rst b/docs/src/code/algo.rst index 0cac9ca9d..b9ca1216c 100644 --- a/docs/src/code/algo.rst +++ b/docs/src/code/algo.rst @@ -14,4 +14,6 @@ TODO algo/space algo/base algo/random + algo/hyperband algo/asha + algo/tpe diff --git a/docs/src/code/algo/hyperband.rst b/docs/src/code/algo/hyperband.rst new file mode 100644 index 000000000..93405b50b --- /dev/null +++ b/docs/src/code/algo/hyperband.rst @@ -0,0 +1,5 @@ +Hyperband Algorithm +=================== + +.. automodule:: orion.algo.hyperband + :members: diff --git a/docs/src/code/algo/tpe.rst b/docs/src/code/algo/tpe.rst new file mode 100644 index 000000000..dbbebf517 --- /dev/null +++ b/docs/src/code/algo/tpe.rst @@ -0,0 +1,5 @@ +TPE Algorithm +=================== + +.. automodule:: orion.algo.tpe + :members: diff --git a/docs/src/code/client.rst b/docs/src/code/client.rst index b7060c3a5..82dc287ab 100644 --- a/docs/src/code/client.rst +++ b/docs/src/code/client.rst @@ -6,6 +6,8 @@ Client helper functions :maxdepth: 1 :caption: Modules + client/cli + client/experiment client/manual .. automodule:: orion.client diff --git a/docs/src/code/client/cli.rst b/docs/src/code/client/cli.rst new file mode 100644 index 000000000..bc1143cab --- /dev/null +++ b/docs/src/code/client/cli.rst @@ -0,0 +1,5 @@ +Commandline client +================== + +.. automodule:: orion.client.cli + :members: diff --git a/docs/src/code/client/experiment.rst b/docs/src/code/client/experiment.rst new file mode 100644 index 000000000..8f0ef5890 --- /dev/null +++ b/docs/src/code/client/experiment.rst @@ -0,0 +1,5 @@ +Experiment client +================= + +.. automodule:: orion.client.experiment + :members: diff --git a/docs/src/code/core/io.rst b/docs/src/code/core/io.rst index 5935dd6e5..a722a2ab1 100644 --- a/docs/src/code/core/io.rst +++ b/docs/src/code/core/io.rst @@ -15,5 +15,4 @@ Input/Output io/resolve_config io/experiment_builder io/experiment_branch_builder - io/evc_builder io/interactive_commands diff --git a/docs/src/code/core/io/evc_builder.rst b/docs/src/code/core/io/evc_builder.rst deleted file mode 100644 index 69acef4a8..000000000 --- a/docs/src/code/core/io/evc_builder.rst +++ /dev/null @@ -1,5 +0,0 @@ -Experiment Version Control Tree Builder -======================================= - -.. automodule:: orion.core.io.evc_builder - :members: diff --git a/docs/src/code/core/utils.rst b/docs/src/code/core/utils.rst index ec3739832..0fb14fab2 100644 --- a/docs/src/code/core/utils.rst +++ b/docs/src/code/core/utils.rst @@ -6,6 +6,7 @@ Utilities :caption: Utility modules utils/format_trials + utils/format_terminal .. automodule:: orion.core.utils :members: diff --git a/docs/src/code/core/utils/format_terminal.rst b/docs/src/code/core/utils/format_terminal.rst new file mode 100644 index 000000000..cc03c7636 --- /dev/null +++ b/docs/src/code/core/utils/format_terminal.rst @@ -0,0 +1,5 @@ +Format terminal +=============== + +.. automodule:: orion.core.utils.format_terminal + :members: diff --git a/docs/src/code/storage.rst b/docs/src/code/storage.rst new file mode 100644 index 000000000..ece83ce71 --- /dev/null +++ b/docs/src/code/storage.rst @@ -0,0 +1,11 @@ +**************** +Storage Protocol +**************** + +.. toctree:: + :maxdepth: 1 + :caption: Modules + + storage/base + storage/legacy + storage/track diff --git a/docs/src/code/storage/base.rst b/docs/src/code/storage/base.rst new file mode 100644 index 000000000..595212fea --- /dev/null +++ b/docs/src/code/storage/base.rst @@ -0,0 +1,5 @@ +Base +==== + +.. automodule:: orion.storage.base + :members: diff --git a/docs/src/code/storage/legacy.rst b/docs/src/code/storage/legacy.rst new file mode 100644 index 000000000..c36e8a1f1 --- /dev/null +++ b/docs/src/code/storage/legacy.rst @@ -0,0 +1,5 @@ +Legacy Database +=============== + +.. automodule:: orion.storage.legacy + :members: diff --git a/docs/src/code/storage/track.rst b/docs/src/code/storage/track.rst new file mode 100644 index 000000000..7af467835 --- /dev/null +++ b/docs/src/code/storage/track.rst @@ -0,0 +1,5 @@ +Track +===== + +.. automodule:: orion.storage.track + :members: diff --git a/docs/src/developer/ci.rst b/docs/src/developer/ci.rst new file mode 100644 index 000000000..1241d1048 --- /dev/null +++ b/docs/src/developer/ci.rst @@ -0,0 +1,33 @@ +********************** +Continuous Integration +********************** +.. image:: https://travis-ci.org/epistimio/orion.svg?branch=master + :target: https://travis-ci.org/epistimio/orion + +.. image:: https://codecov.io/gh/epistimio/orion/branch/master/graphs/badge.svg?branch=master + :target: https://codecov.io/gh/epistimio/orion + +We use travis-ci_ and codecov_ for continuous integration and tox_ to automate the process at +the repository level. + +When a commit is pushed in a pull request, a call to ``$ tox`` is made by +TravisCI which triggers the following chain of events: + +#. A test environment is spun up for each version of python tested (defined in ``tox.ini``). +#. Code styles verifications, and quality checks are run (``flake8``, ``pylint``, ``doc8``). The + documentation is also built at this time (``docs``). +#. The test suite is run completely with coverage, including the dedicated backward + compatibility tests. +#. The structure of the repository is validated by ``check-manifest`` and ``readme_renderer``. +#. The results of the coverage check are reported directly in the pull request. + +The coverage results show the difference of coverage introduced by the changes. We always aim to +have changes that improve coverage. + +If a step fails at any point in any environment, the build will be immediately stopped, marked as +failed and reported to the pull request and repository. In such case, the maintainers and +relevant contributors will be alerted. + +.. _codecov: https://codecov.io/ +.. _travis-ci: https://travis-ci.com/ +.. _tox: https://tox.readthedocs.io/en/latest/ diff --git a/docs/src/developer/documenting.rst b/docs/src/developer/documenting.rst index 2be0e1e9d..678c61305 100644 --- a/docs/src/developer/documenting.rst +++ b/docs/src/developer/documenting.rst @@ -1,28 +1,35 @@ -.. contents:: Developer's Guide 102: Documenting +*********** +Documenting +*********** +The documentation is built using Sphinx_ with the `Read The Docs`_ theme. -******** -Document -******** +We try to write the documentation at only one place and reuse it as much as possible. For instance, +the home page of this documentation (https://orion.readthedocs.io/) is actually pulled +from the README.rst and appended with a table of content of the documentation generated +automatically. The advantage of having a single source of truth is that it's vastly easier to find +information and keep it up to date. -We are using `Read The Docs`_ theme for Sphinx_. +Updating README.rst +=================== -Run tox command:: +When you need to reference a page from the documentation on the README.rst, make sure to always +point to the **stable** channel in readthedocs (https://orion.readthedocs.io/en/stable/). - tox -e docs - -to build *html* and *man* pages for documentation +If you need to add a link to a specific page in the documentation that is not yet on the stable +channel, make the link to the **latest** channel (https://orion.readthedocs.io/en/latest/). During +the :doc:`release process ` the link will be updated to the stable channel. -Also by executing:: +Building documentation +====================== - tox -e serve-docs +To generate the *html* and *man* pages of the documentation, run: -the page under ``/docs/build/html`` is hosted by *localhost*. +.. code-block:: sh -Use also command:: - - tox -e doc8 + tox -e docs -to ensure that documentation standards are being followed. +When writing, you can run ``$ tox -e serve-docs`` to host the content of +``/docs/build/html`` on http://localhost:8000. .. _Read The Docs: https://sphinx-rtd-theme.readthedocs.io/en/latest/ .. _Sphinx: http://www.sphinx-doc.org/en/master/ diff --git a/docs/src/developer/installing.rst b/docs/src/developer/installing.rst new file mode 100644 index 000000000..979bd302d --- /dev/null +++ b/docs/src/developer/installing.rst @@ -0,0 +1,58 @@ +*************** +Getting started +*************** +In this section, we'll guide you to install the dependencies and environment to develop on Oríon. +We made our best to automate most of the process using Python's ecosystem goodness to facilitate +your onboarding. Let us know how we can improve! + +Oríon +===== +The first step is to clone your remote repository from Github (if not already done, make sure to +fork our repository_ first). + +.. code-block:: sh + + $ git clone https://github.com/epistimio/orion.git + +.. tip:: + + The usage of a `virtual environment`_ is recommended, but not necessary. + + .. code-block:: sh + + $ mkvirtualenv -a $PWD/orion orion + +Then, you need to deploy the project in `development mode`_ by invoking the ``setup.py`` script with +``develop`` argument or by using ``pip install --editable``. + +.. code-block:: sh + + $ python setup.py develop --optimize=1 + +Database +======== +Follow the same instructions as to install the :ref:`install_database` locally. + +Verifying the installation +========================== +For developer's convenience the packages enlisted in the requirements file +``dev-requirements.txt`` are meant to facilitate the development process. +Packages include `tox `_ for defining +and organizing macros of sh commands in virtual environments, and packages +for linting as we will see in a next chapter. + +Check everything is ready by running python 3.6 the test suite using ``$ tox -e py36`` (this will +take some time). If the tests can't be run to completion, contact us by opening a `new issue +`_. We'll do our best to help you! + +About tox +========= +tox_ is an automation tool that execute tasks in virtual environments. We automate all our testing, +verification, and release macros with it. All contexts are defined in +`tox.ini `_. They can be executed using +``$ tox -e ``. + +.. _tox: https://tox.readthedocs.io/en/latest/ +.. _repository: https://github.com/epistimio/orion +.. _virtual environment: https://virtualenvwrapper.readthedocs.io/en/latest/command_ref.html#mkvirtualenv +.. _development mode: https://setuptools.readthedocs.io/en/latest/setuptools.html#development-mode diff --git a/docs/src/developer/overview.rst b/docs/src/developer/overview.rst new file mode 100644 index 000000000..26110cd28 --- /dev/null +++ b/docs/src/developer/overview.rst @@ -0,0 +1,21 @@ +******** +Overview +******** + +Welcome to the project. We're excited you decided to improve Oríon! + +Hopefully, by then you should be familiar with our `contribution guide `_ and `code of conduct `_. +The documentation for developers is organized in the following, easy to read, sections: + +* :doc:`Getting Started `. Installing the development environment +* :doc:`Conventions `. Get familiar with the project's standards and guidelines +* :doc:`Testing `. Implementing your changes and how to test your code +* :doc:`Documenting `. Documenting your changes and updating the documentation +* :doc:`Continuous Integration `. Get familiar with our continuous integration setup +* :doc:`Releasing a new version `. How to create and ship a new release + +When you're done, don't forget to `rebase `_ +your changes to the latest version of the *develop* branch of https://github.com/Epistimio/orion. +Then, submit your `pull request `_ and ask for a review when the build is passing in the CI. + +If you have any question or suggestion feel free to reach out to us by email or by `opening an issue `_. diff --git a/docs/src/developer/release.rst b/docs/src/developer/release.rst new file mode 100644 index 000000000..afc744153 --- /dev/null +++ b/docs/src/developer/release.rst @@ -0,0 +1,71 @@ +********* +Releasing +********* + +In this document, we describe the procedure used to release a new version of Oríon to the public. +Release artifacts are distributed through PyPI_. + +Creating a release candidate +============================ +The first step in releasing a new version is to create a release candidate. A release candidate +allows us to thoroughly test the new version and iron out the remaining bugs. Additionally, it's +also at this time that we make sure to change the version number and update related documentation +such as the README.rst. + +#. Create a new branch from the *develop* branch named ``release-{version}rc``, where + ``{version}`` is replaced by the number of the new version (e.g., ``1.2.0``). This effectively + freezes the feature set for this new version, while allowing regular development to continue take + place in the *develop* branch. More information is available in :ref:`standard-vcs`. +#. In README.rst, replace any link pointing to ``https://orion.readthedocs.io/en/latest/**`` to + ``https://orion.readthedocs.io/en/stable/**``. +#. Create a new pull request for the branch created in the last step and list all the changes by + category. Example: https://github.com/Epistimio/orion/pull/283. +#. Update the **Citation** section in the project's README.rst with the latest version of Oríon. +#. Update the ``ROADMAP.md``. +#. Update the linters ``flake8``, ``pylint``, and ``doc8`` to their latest versions in ``tox.ini``, + and address any new error. +#. Run the stress tests according to the instructions in stress test's documentation. + +.. _release-make: + +Making the release +================== +Once the release is thoroughly tested and the core contributors are confident in the release, it's +time to create the release artifacts and publish the release. + +#. Merge the release candidate branch to master (no fast-forward merge, we want a merge commit). +#. Create a `new draft release ` on GitHub. Set the + target branch to *master* and the tag version to ``v{version}``. Reuse the changelog from the + release candidate pull request's for the description. See the `0.1.6 + `_ version example. +#. Merge the master branch back to develop. +#. Delete the release candidate branch. + +Publishing the release +====================== +Once the release is correctly documented and integrated to the VCS workflow, we can publish it to +the public. + +* Publish the GitHub release. The source code archives will be added automatically by GitHub to the + release. +* Publish the new version to PyPI_ by executing ``$ tox -e release`` from the tagged commit on the + master branch. + +After the release +================= +Once published, it's important to notify our user base and community that a new version exists so +they can update their version or learn about Oríon. + +* Verify Oríon's Zenodo_ page has been updated to reflect the new release on GitHub_. Zenodo is + configured to automatically create a new version whenever a new release is published on GitHub. +* Verify Oríon's Anaconda_ page contains the new version. Binaries for the new version are uploaded + automatically by TravisCI when the tests pass for the merge commit tagged with the new version on + the master branch . +* Announce the new release on your #orion's slack channel. +* Announce the new release on relevant communication channels (e.g., email, forums, google groups) +* Congratulations, you published a new version of Oríon! + +.. _GitHub: https://github.com/Epistimio/orion/releases +.. _Zenodo: https://doi.org/10.5281/zenodo.3478592 +.. _PyPI: https://pypi.org/project/orion/ +.. _Anaconda: https://anaconda.org/epistimio/orion diff --git a/docs/src/developer/standards.rst b/docs/src/developer/standards.rst index 613eb97d2..88bc3defc 100644 --- a/docs/src/developer/standards.rst +++ b/docs/src/developer/standards.rst @@ -1,57 +1,230 @@ -.. contents:: Developer's Guide 103: Contribution Standards +*********** +Conventions +*********** -********** -Contribute -********** +In this chapter, we present the different standards and guidelines we use throughout the project. +All the conventions are enforced automatically during the PR process. -Coding and Repository Standards -=============================== +You can verify if your code will pass the checks locally beforehand using ``$ tox -e lint`` (which +is the equivalent of ``$ tox -e flake8,pylint,doc8,packaging``). -We are using flake8_ (along with some of its plugins) and pylint_. -Their styles are provided in ``/tox.ini`` and ``/.pylintrc`` respectively. +.. _standard-coding: -.. code-block:: sh +Coding standard +=============== - tox -e flake8 - tox -e pylint +Our coding standards are specified via flake8_ and pylint_. Their configurations are provided in +``tox.ini`` and ``.pylintrc`` respectively. You can verify the conformity of your changes locally +by running ``$ tox -e flake8`` and ``$ tox -e pylint``. -Also, we are using a check-manifest_ which compares ``/MANIFEST.in`` and git -structure of the source repository, and finally readme_renderer_ which -checks whether ``/README.rst`` can be -actually rendered in PyPI_ website page for Oríon. +In addition, we follow `Numpy's docstring standards +`_ to ensure a good +quality of documentation for the project. -.. code-block:: sh +.. _standard-vcs: - tox -e packaging +Version Control Guidelines +========================== -To run all of expected linters execute:: +To collaborate through VCS, we follow the +`gitflow `_ +workflow. The *develop* and *master* branches are protected and can only be changed with pull +requests. - tox -e lint +For branch names, we recommend prefixing the name of the branch with *feature/*, *fix/*, and +*doc/* depending on the change you're implementing. Additionally, we encourage adding the issue's id +(if there is one) at the start of the branch name, after the prefix. For example the branch name for +a bug represented by issue 225 would be ``fix/225-short-bug-description``. -.. _flake8: http://flake8.pycqa.org/en/latest/ -.. _pylint: https://www.pylint.org/ -.. _check-manifest: https://pypi.org/project/check-manifest/ -.. _readme_renderer: https://pypi.org/project/readme_renderer/ -.. _PyPI: https://pypi.org/ +When creating a release, we use the pattern *release-{version}rc*. This branch represent the release +candidate that will be merged in the master branch when the changes are ready to be launched in +production. + +Synchronization +--------------- +Regarding merges, we recommend you keep your changes in your forked repository for as long as +possible and rebase your branch to Oríon's develop branch before submitting a pull request. + +Most probably, the develop branch will have changed by the time your pull request is approved. In +such cases, we recommend that you merge the changes from develop to your branch when the reviewer +approves your pull request and then the maintainer will merge your branch to develop, closing your +pull request. + +We discourage rebases after the pull request has been submitted as it can cause problems in +GitHub's review system which makes it loose track of the comments on the pull request. On another +note, merges are always done with the creation of a merge commit, also known as a *non fast-forward +merge*. + +In some cases where the pull request embodies contributions which are scattered across multiple +commits containing incremental changes (e.g., ``fix pep8``, ``update based on feedback``), the pull +request may be integrated to the development branch using `squash and merge `_ +by the maintainer to avoid clutter. It is strongly encouraged to make small pull requests. They are +simpler to implement, easier to integrate and faster to review. + +.. _standard-documenting: + +Documenting standard +==================== + +Our documentation standard is upheld via doc8_. You can verify your documentation modifications by +running ``$ tox -e doc8``. The information about writing and generating documentation is available +in the :doc:`documenting` chapter. + +.. _standard-repository: + +Repository standard +=================== + +We are using check-manifest_ to ensure no file is missing when we distribute our application and we +also use readme_renderer_ which checks whether ``/README.rst`` can be rendered in PyPI_. +You can verify these locally by running ``$ tox -e packaging``. + +Versioning standard +=================== + +We follow the `semantic versioning `_ convention to name the versions of Oríon. +While in beta, we prepend a ``0`` on the left of the major version. + +GitHub labels +============= + +The labels are divided in a few categories. +The objective is to have precise labels while staying lean. +Each category is identified with a color. +Bold colors should be used for tags that should be easily findable when looking at the issues. + +Topic +----- + +Specifies an area in the software or meta concerns. + +* |documentation|_ +* |tests|_ + +.. |documentation| replace:: ``documentation`` +.. |tests| replace:: ``tests`` +.. _documentation: https://github.com/Epistimio/orion/labels/documentation +.. _tests: https://github.com/Epistimio/orion/labels/tests + +Addition +-------- + +Specifies a new feature. + +* |feature|_ + +.. |feature| replace:: ``feature`` +.. _feature: https://github.com/Epistimio/orion/labels/feature -Project Structure -================= +Improvement +----------- -TODO +Improves a feature or non-functional aspects (e.g., optimization, prettify, technical debt) -Source Repository Structure -=========================== +* |enhancement|_ -TODO +.. |enhancement| replace:: ``enhancement`` +.. _enhancement: https://github.com/Epistimio/orion/labels/enhancement -Fork and Pull Request -===================== +Problems +-------- -Fork Oríon remotely to your Github_ account now, and start by submitting a -`Pull Request `_ to us or by -discussing an `issue `_ with us. +Indicates an unexpected problem or unintended behavior -.. image:: https://img.shields.io/github/forks/epistimio/orion.svg?style=social&label=Fork - :target: https://github.com/epistimio/orion/network +* |bug|_ + +.. |bug| replace:: ``bug`` +.. _bug: https://github.com/Epistimio/orion/labels/bug + +Status +------ + +Status of the issue or Priority + +* |blocked|_ +* |in progress|_ +* |in review|_ + +.. |blocked| replace:: ``blocked`` +.. _blocked: https://github.com/Epistimio/orion/labels/blocked +.. |in progress| replace:: ``in progress`` +.. _in progress: https://github.com/Epistimio/orion/labels/in%20progress +.. |in review| replace:: ``in review`` +.. _in review: https://github.com/Epistimio/orion/labels/in%20review + +Discussion +---------- + +Questions or feedback about the project + +* |user question|_ +* |dev question|_ +* |feedback|_ + +.. |user question| replace:: ``user question`` +.. _user question: https://github.com/Epistimio/orion/labels/user%20question +.. |dev question| replace:: ``dev question`` +.. _dev question: https://github.com/Epistimio/orion/labels/dev%20question +.. |feedback| replace:: ``feedback`` +.. _feedback: https://github.com/Epistimio/orion/labels/feedback + +Community +--------- + +Related to the community, calls to application + +* |help wanted|_ +* |good first issue|_ + +.. |help wanted| replace:: ``help wanted`` +.. _help wanted: https://github.com/Epistimio/orion/labels/help%20wanted +.. |good first issue| replace:: ``good first issue`` +.. _good first issue: https://github.com/Epistimio/orion/labels/good%20first%20issue + +Priority +-------- + +Qualifies priority bugs and features. +This category enables the maintainers to identify which issues should be done in priority. +Each label has a different shade based on the priority. + +* |critical|_ +* |high|_ +* |medium|_ +* |low|_ + +.. |critical| replace:: ``critical`` +.. _critical: https://github.com/Epistimio/orion/labels/critical +.. |high| replace:: ``high`` +.. _high: https://github.com/Epistimio/orion/labels/high +.. |medium| replace:: ``medium`` +.. _medium: https://github.com/Epistimio/orion/labels/medium +.. |low| replace:: ``low`` +.. _low: https://github.com/Epistimio/orion/labels/low + +Inactive +-------- + +No action needed or possible. The issue is either fixed, addressed + +* |on hold|_ +* |wont fix|_ +* |duplicate|_ +* |invalid|_ + +.. |on hold| replace:: ``on hold`` +.. _on hold: https://github.com/Epistimio/orion/labels/on%20hold +.. |wont fix| replace:: ``wont fix`` +.. _wont fix: https://github.com/Epistimio/orion/labels/wont%20fix +.. |duplicate| replace:: ``duplicate`` +.. _duplicate: https://github.com/Epistimio/orion/labels/duplicate +.. |invalid| replace:: ``invalid`` +.. _invalid: https://github.com/Epistimio/orion/labels/invalid .. _Github: https://github.com +.. _flake8: http://flake8.pycqa.org/en/latest/ +.. _doc8: https://pypi.org/project/doc8/ +.. _pylint: https://www.pylint.org/ +.. _check-manifest: https://pypi.org/project/check-manifest/ +.. _readme_renderer: https://pypi.org/project/readme_renderer/ +.. _PyPI: https://pypi.org/ diff --git a/docs/src/developer/stress.rst b/docs/src/developer/stress.rst new file mode 100644 index 000000000..80accf67b --- /dev/null +++ b/docs/src/developer/stress.rst @@ -0,0 +1,49 @@ + +Stress Tests +============ + +There is many potential scenarios leading to race conditions in Oríon due to the +use of multiple concurrent workers attempting to generate or reserve trials in parallel. + +While these race conditions are tested in unit-tests with hand-crafted scenarios it is impossible +to ensure tests coverage for all possible scenarios. +The stress tests aims at pushing the Oríon to it's limit, increasing the chances of +uncovering hidden bugs from the unit-tests. Being ressource intensive, the stress-tests +are not part of the continuous integration pipeline that is executed for every +contribution to code. The stress-test should be run before every release, with the +resulting plots shared on the pull-request of the release candidate. Core members +reviewing a pull-request may run the stress-test if there is reasonable doubts on the effect of the +contribution on the efficiency of Oríon. + +Execution +--------- + +The stress tests are include in ``tox``, you can therefore run all stress tests +with the simple command ``$ tox -e stress``. If you intend to run the tests directly without +using ``tox``, make sure to install the additional dependencies of the stress tests +with ``$ pip install -r tests/stress/requirements.txt``. + +The stress tests include sleeps to simulate workloads in the different workers, nevertheless +it is better advised to execute them on a machine with sufficient cores to run many workers in +parallel. The results provided below were executed on a machine with 16 cores for instance. + +Results +------- + +The tests verify the coherence of the results, validating that no race-conditions were +improperly handled leading to corrupted entries. The absence of errors does not imply the code base +is free of bugs, but stress tests are very likely to uncover them. + +The tests verifies that the number of trials for the completed experiments is exactly equal to +``max_trials`` and verifies that all points are different. +Additionaly, each worker reports the number of trials that was executed. The total +of the number of executed trials is matched against ``max_trials``. + +Finally, the benchmarking of the different scenarios, for different number of workers, +different database backend and different search space (discrete or real) are plotted +in a file ``test.png`` as reported below. Numerical results are also saved in a file +``test.json``. + +.. figure:: test.png + :scale: 75 % + :alt: Results of the stress tests diff --git a/docs/src/developer/test.png b/docs/src/developer/test.png new file mode 100644 index 000000000..e4da9d6d6 Binary files /dev/null and b/docs/src/developer/test.png differ diff --git a/docs/src/developer/testing.rst b/docs/src/developer/testing.rst index 251be15bd..1b283bdac 100644 --- a/docs/src/developer/testing.rst +++ b/docs/src/developer/testing.rst @@ -1,79 +1,62 @@ -.. contents:: Developer's Guide 101: Tools & Testing - ******* Testing ******* -For developer's convenience the packages enlisted in the requirements file -``dev-requirements.txt`` are meant to facilitate the development process. -Packages include `tox `_ for defining -and organizing macros of sh commands in virtual environments, and packages -for linting as we will see in a next chapter. - - -Continuous Integration -====================== - -We use **TravisCI** and **CodeCov**. - -.. image:: https://travis-ci.org/epistimio/orion.svg?branch=master - :target: https://travis-ci.org/epistimio/orion - -.. image:: https://codecov.io/gh/epistimio/orion/branch/master/graphs/badge.svg?branch=master - :target: https://codecov.io/gh/epistimio/orion - -Continuous Testing -================== +All the tests for our software are located and organized in the directory +``/tests`` relative to the root of the code repository. There are three kinds of +tests: -Using ``tox`` we can automate many processes of continuous testing into macros. -All contexts are defined in `/tox.ini `_. +#. Unit tests, located under ``/tests/unittests``. + They test individual features, often at the class granularity. +#. Functional tests, located under ``/tests/functional``. + They test end to end functionality, invoking Oríon's executable from shell. +#. Stress tests, located under ``/tests/stress``. + They test the resilience and performance. -By calling:: +The tests are made with pytest_. We highly recommend you check it out and take a look at the +existing tests in the ``tests`` folder. - tox +We recommend invoking the tests using ``tox`` as this will be the method used by the CI system. +It will avoid you headaches when trying to run tests and nasty surprises when submitting PRs. -one attempts to call all contexts that matter for our Continuous Integration in -the same call. Those are *py35*, *py36*, *py37* for running tests and -checking coverage, *flake8*, *pylint*, *doc8*, *packaging* for linting code, -documentation and Python packaging-related files, and finally *docs* for -building the Sphinx documentation. +Running tests +============= +To run the complete test suite, you can use .. code-block:: sh - tox -e py + $ tox -e py This will call tests for the particular shell's environment Python's executable. If the tests are successful, then a code **coverage** summary will be printed on shell's screen. -.. code-block:: sh +However, during development consider using - tox -e devel +.. code-block:: sh -This will finally always run the tests on background and on a code change event, -it automatically performs **regression testing**. + $ tox -e devel -Test -==== +This will run in the background and run the tests on a code change event (e.g., you save a file) +automatically run the tests when you make a change. It's particularly useful when you also +specify the location of the tests: -All the tests for our software are located and organized in the directory -``/tests`` relative to the root of the code repository. There are two kinds of -tests: Unit tests are located under ``/tests/unittests`` and functional tests -(tests which invoke Oríon's executable from shell) under ``/tests/functional``. +.. code-block:: sh -Our software requires pytest_ ``>=3.0.0`` for automated testing. -Also, it requires the particular database setup described in -:doc:`/install/database` to have been followed. + $ tox -e devel -- 'path/to/your/tests/' -Hence the tests can be invoked with:: +.. code-block:: sh - python setup.py test + $ tox -e devel -- 'path/to/your/tests/file.py' -For instance:: +.. code-block:: sh - python setup.py test --addopts 'tests/unittests' + $ tox -e devel -- 'path/to/your/tests/file.py::test_name' -will only execute tests located under ``/tests/unittests``, this is all unit -tests. +This way, the tests will be ran automatically every time you make a change in the specified folder, +file, or test respectively. This option is also available for ``$ tox -e py``. .. _pytest: https://docs.pytest.org/en/latest/ + + +.. include:: stress.rst diff --git a/docs/src/index.rst b/docs/src/index.rst index a57ddf19d..518122d64 100644 --- a/docs/src/index.rst +++ b/docs/src/index.rst @@ -1,51 +1,58 @@ .. include:: ../../README.rst - .. toctree:: - :caption: Installation Guide + :caption: Welcome :maxdepth: 1 + install/gettingstarted install/core install/database - install/plugins - +.. contributors .. toctree:: - :caption: User's Guide + :caption: User Guide :maxdepth: 1 - user/pytorch - user/monitoring - user/searchspace - user/algorithms + user/overview user/script + user/api + user/algorithms + user/searchspace + user/monitoring user/evc + user/storage + user/config + user/parallel .. toctree:: - :caption: Examples + :caption: Tutorials :maxdepth: 1 - examples/pytorch_cifar - examples/pytorch_a2c_ppo - examples/cluster + tutorials/scikit-learn + tutorials/pytorch-mnist + tutorials/cluster + tutorials/pytorch_cifar + tutorials/pytorch_a2c_ppo .. toctree:: - :caption: Plugin development's Guide + :caption: Plugins :maxdepth: 1 plugins/base + plugins/install plugins/algorithms - plugins/database - plugins/analysis +.. toctree:: + :caption: Developer Guide + :maxdepth: 1 -.. .. toctree:: -.. :caption: Plan - -.. roadmap -.. changelog -.. contributors - + developer/overview + developer/installing + developer/standards + developer/testing + developer/documenting + developer/ci + developer/release .. toctree:: :maxdepth: 2 @@ -54,13 +61,6 @@ code/core code/algo code/client - -.. toctree:: - :caption: Developer's Guide - :maxdepth: 1 - - developer/testing - developer/documenting - developer/standards + code/storage .. Don't fetch reference/viz diff --git a/docs/src/install/core.rst b/docs/src/install/core.rst index 081764942..a4721ebd5 100644 --- a/docs/src/install/core.rst +++ b/docs/src/install/core.rst @@ -1,59 +1,36 @@ -**************************** -Installation of Orion's core -**************************** +**************** +Installing Oríon +**************** -Oríon should work on most Linux distributions and Mac OS X. It is tested on Ubuntu 16.04 LTS and Mac -OS X 10.13. We do not support Windows and there is no short term plan to do so. +Oríon is compatible with most Linux distributions and Mac OS X. +Windows 10 is also supported through the `Windows Subsystem for Linux `_. +Oríon is tested on Ubuntu 16.04 LTS and Mac OS X 10.13. -Via PyPI -======== - -The easiest way to install Oríon is using the Python package manager. The core of Oríon is -registered on PyPI under `orion`. +The easiest way to install the latest version of Oríon is through the Python package manager. Oríon +is registered on PyPI_ under `orion`. Use the following command to install Oríon: .. code-block:: sh pip install orion -This will install all the core components. Note that the only algorithm provided with it -is random search. To install more algorithms, you can look at section :doc:`/install/plugins`. +Note that Oríon comes with the following algorithms: Random Search, Hyperband, TPE, and ASHA. More +algorithms are available in the :doc:`plugin section `, their installation is also +done through ``pip``. + +Afterwards, we recommend to :doc:`select a database ` for Oríon to use unless +you're comfortable with the default option. -Via Git -======= +.. _PyPI: https://pypi.org/project/orion/ -This way is recommended if you want to work with the bleeding edge version -of Oríon. +Bleeding edge +============= -Recommended for users ---------------------- +If you want to work with the bleeding edge version of Oríon, we recommend you install it with the +following command: .. code-block:: sh pip install git+https://github.com/epistimio/orion.git@develop -Note that the bleeding-edge branch is develop. The master branch is the same as the latest version +Note that the bleeding edge branch is develop. The master branch is the same as the latest version on PyPI. - -Recommended for developers of Oríon ------------------------------------ - -Clone remote repository_ from Github, using *https* or *ssh*, and then -deploy the project in `development mode`_, by invoking the ``setup.py`` script -with ``develop`` argument -or by using ``pip install --editable``. Usage of a `virtual environment`_ is -also recommended, but not necessary. Example: - -.. code-block:: sh - - - git clone https://github.com/epistimio/orion.git --branch develop - mkvirtualenv -a $PWD/orion orion - python setup.py develop --optimize=1 - - deactivate - -Begin reading instructions for developing it in :doc:`/developer/testing`. - -.. _repository: https://github.com/epistimio/orion -.. _virtual environment: https://virtualenvwrapper.readthedocs.io/en/latest/command_ref.html#mkvirtualenv -.. _development mode: https://setuptools.readthedocs.io/en/latest/setuptools.html#development-mode diff --git a/docs/src/install/database.rst b/docs/src/install/database.rst index b0227694c..5fae8ff70 100644 --- a/docs/src/install/database.rst +++ b/docs/src/install/database.rst @@ -1,6 +1,8 @@ -************** -Setup Database -************** +.. _install_database: + +******** +Database +******** .. note:: @@ -21,10 +23,6 @@ not intending to use an already existing one, is necessary. We are going to follow an example of installing and setting up a minimal database locally. -.. note:: - - This is the same database required to be setup in order to run the tests. - Local MongoDB Installation ========================== diff --git a/docs/src/install/gettingstarted.rst b/docs/src/install/gettingstarted.rst new file mode 100644 index 000000000..993b40fad --- /dev/null +++ b/docs/src/install/gettingstarted.rst @@ -0,0 +1,151 @@ +*************** +Getting Started +*************** + +Welcome! In this chapter, we give a quick overview of Oríon's main features and how it can help you +streamline your machine learning workflow whether you are a researcher or engineer. + +Oríon is a black box function optimization library with a key focus on usability and integrability +for its users. For example, as a machine learning engineer, you can integrate Oríon to your +existing ML workflow to handle reliably the hyperparameter optimization and tuning phase. As a ML +researcher, you can use Oríon to tune your models but also integrate your own algorithms to Oríon to +serve as an efficient optimization engine and compare with other algorithms in the same context and +conditions. + +Conversely, Oríon does not aim to be a machine learning framework or pipeline, or an automatic +machine learning product. Oríon focuses essentially on black box optimization. However, we do +encourage developers to integrate Oríon into that kind of systems as a component and we will do +our best to help you if you're interested. + +Before continuing the overview, we assume that you have a basic understanding of machine learning +concepts. You may also want to install Oríon on your machine and configure it for a database before +continuing. Please refer to our :doc:`installation instructions ` and :doc:`database +setup `. + +We also made a presentation_ if you prefer going along with narrated content! + +.. _presentation: https://bluejeans.com/playback/s/4WUezzFCmb9StHzYgB0RjVbTUCKnRcptBvzBMP7t2UpLyKuAq7Emieo911BqEMnI + +Integration +=========== + +The core value of Oríon is to be non-intrusive. As such, we made it very easy to integrate it in +your machine learning environment. Suppose you're normally executing ``./script.py --lr=0.1``, with +``lr`` controlling your learning rate. + +The only modification you have to do is to call :py:func:`orion.client.report_objective` at the end +of your script to report the results of the hyper-parameter optimization with the objective to +minimize as the parameter. + +We made :doc:`a tutorial ` to guide you through those steps. + +Python API +---------- +Oríon can also be run from Python using our Python API, making it easy to integrate it in any +machine learning workflow or product. A detailed overview of this feature is available in +:doc:`/user/api` and :doc:`/user/storage`. + +Optimize +======== + +To actually optimize the hyper-parameters, we use Oríon ``hunt`` command to start the black-box +optimization process. + +For the previous example, we would run + +.. code-block:: console + + $ orion hunt -n script.py --lr~'loguniform(1e-5, 1.0)' + +This is going to start the optimization process using the default optimization algorithm and sample +the values for the ``lr`` hyper-parameter in a log uniform distribution between 0.00001 et 1.0. Each +trial will be stored in the database that you configured during the installation process (which can +be in-memory, a file, or a local or remote MongoDB instance). + +Additionally, the experiments are versioned -- think of it as a git for scientific experimentation +-- enabling you to keep track of all your trials with their parameters. This guarantees that you can +reproduce or trace back the steps in your work for free. + +You can fine-tune the distribution and algorithm with many options either with more arguments or by +using a configuration file. Learn more at :doc:`/user/api`. + +Scaling up +---------- + +Oríon is built to operate in parallel environments and is natively asynchronous; it runs efficiently +whether you execute it on your laptop or in a computing farm with thousands of processors. + +Moreover, adding more workers is as easy as executing the ``$ orion hunt`` command for each extra +worker needed. Indeed, Oríon doesn't uses a master / worker approach. The synchronization point is +the database: each worker will separately generate a new trial based on the state of the experiment +stored in the database. + +Make sure to visit :doc:`/user/parallel` to learn more about it and check out the tutorial to run +Oríon in :doc:`HPC environments `. + +Search Space +============ + +The search space is defined by priors for each hyperparameter to optimize. In the snippet earlier, +we used the *loguniform* prior. Oríon supports a vast range of search spaces, including almost all +the distributions from `scipy `_ out of the +box. You can define them either directly in the command line (as shown previously) or in a +configuration file: + +.. code-block:: yaml + + lr: 'orion~loguniform(1e-5, 1.0)' + +And then use it with: + +.. code-block:: console + + $ orion hunt -n script.py --config config.yaml + +Make sure to visit :doc:`/user/searchspace` for an exhaustive list of priors and their parameters. + +Algorithms +========== + +Oríon supports the latest established hyperparameter algorithms out of the box such as +:ref:`random-search`, :ref:`ASHA`, :ref:`tpe-algorithm`, and :ref:`hyperband-algorithm`; making it +easy to switch between them or create benchmarks. Each algorithm is fully configurable through the +configuration file. + +You can also bring your own algorithms to Oríon with its plugin system, where you can compare it +against other algorithms using the same framework and dataset. It also enables you to easily share +and publish your algorithm to other members of the community. + +Make sure to checkout `this presentation +`_ +for a quick overview of each algorithm and to visit :doc:`/user/algorithms` to learn about the +algorithms and get recommendations about their use cases. + +Monitoring +========== + +Oríon offers different ways to get information about your experiments and trials. + +* ``$ orion list`` gives an overview of all the experiments. +* ``$ orion status`` gives an overview of trials for experiments. +* ``$ orion info`` gives a detailed description of a given experiment such as priors and best + trials. + +Each command is described in detail in :doc:`/user/monitoring`. + +If you want a more fine grained approach, you can always query the database directly or via Oríon's +python API. Check out :doc:`/user/storage` for more information. + +Next steps +========== + +It's worth to take a look at the :doc:`configuration system ` to learn more about how +to make the most out of Oríon and define precise behaviors for your algorithms and experiments. +Oríon uses a configuration agnostic approach where you can use any configuration file format you're +comfortable with. + +Explore the :doc:`User Manual `, Oríon is simple from the outside but is feature +rich! We also have a few tutorials available (e.g., :doc:`/tutorials/scikit-learn`, +:doc:`/tutorials/pytorch-mnist`). If you're a researcher or developer you might be interested to +:doc:`contribute ` or develop your own :doc:`algorithms plugins +`! diff --git a/docs/src/plugins/algorithms.rst b/docs/src/plugins/algorithms.rst index 796703999..31824ef52 100644 --- a/docs/src/plugins/algorithms.rst +++ b/docs/src/plugins/algorithms.rst @@ -1,44 +1,247 @@ -********** -Algorithms -********** +********************* +Developing algorithms +********************* -The absolute bare **minimum interface** an optimization algorithm needs to have -is an ``observe`` method and a ``suggest`` method, so that: +The documentation here explains how to create new algorithm plugins based on the +`cookiecutter `_. - 1. An algorithm can ``observe`` the **results** associated with the evaluation - of a list of points in the parameter space. Using the history of evaluated - attempts, the algorithm can estimate better points to evaluate. - 2. An algorithm can ``suggest`` **new points** in the parameter space to try. +Usage +===== -An attribute given to all algorithms is an object defining the **parameter -search space**. It may be useful to the developer, or an algorithm may want -to advise it. +.. _GitHub: https://github.com/Epistimio/cookiecutter-orion.algo -Finally, it is suggested to distribute algorithm implementations -independently from the core Oríon package. In fact, the core package is able -to discover externally installed algorithms implementing Oríon's interface -**as plugins**! -.. contents:: Developer's Guide 104: Basic Algorithm +Install Python requirements to use the template: -Meet BaseAlgorithm -================== +.. code-block:: console -Basic algorithm's interface is defined in :doc:`/code/algo`. -The example we are going to follow is from the nested source repository used for -actual functional testing, gradient_descent_algo_. + $ python -m pip install cookiecutter>=1.5 versioneer>=0.18 jinja2 -Implement Basic Algorithms -========================== -Template `src/orion/algo/gradient_descent.py `_ -TODO +Create a new project directly from the template on `GitHub`_: -The Space Class -=============== +.. code-block:: console -TODO + $ cookiecutter gh:Epistimio/cookiecutter-orion.algo + plugin_name []: skopt + author_name []: Xavier Bouthillier + author_short [Author Name]: + author_email []: xavier.bouthillier@umontreal.ca + github_username []: bouthilx + copyright [2019, Author Name]: + short_description [TODO]: + synopsis [TODO]: + algo_name []: BayesianOptimizer + algo_module_name [bayesianoptimizer]: bayes -.. _gradient_descent_algo: https://github.com/epistimio/orion/tree/master/tests/functional/gradient_descent_algo -.. _gradient_descent_algo_code: https://github.com/epistimio/orion/blob/master/tests/functional/gradient_descent_algo/src/orion/algo/gradient_descent.py ++-----------------------+--------------------------------------------+--+ +| Field | Description | | ++-----------------------+--------------------------------------------+--+ +| ``plugin_name`` | Will be used for orion.algo.plugin_name | | ++-----------------------+--------------------------------------------+--+ +| ``author_name`` | For metadata of python package | | ++-----------------------+--------------------------------------------+--+ +| ``author_short`` | For metadata of python package | | ++-----------------------+--------------------------------------------+--+ +| ``author_email`` | For metadata of python package | | ++-----------------------+--------------------------------------------+--+ +| ``github_username`` | Username to build the url for installation | | ++-----------------------+--------------------------------------------+--+ +| ``copyright`` | For the BSD-3 license | | +| | (You can change the license) | | ++-----------------------+--------------------------------------------+--+ +| ``short_description`` | For metadata of python package | | ++-----------------------+--------------------------------------------+--+ +| ``synopsis`` | For documentation in algo module | | ++-----------------------+--------------------------------------------+--+ +| ``algo_name`` | Name for the algorithm class | | ++-----------------------+--------------------------------------------+--+ +| ``algo_module_name`` | Name of the algorihtm module | | ++-----------------------+--------------------------------------------+--+ +This will create the following package structure. + +.. code-block:: bash + + orion.algo.{plugin_name} + ├── README.rst + ├── setup.cfg + ├── setup.py + ├── MANIFEST.in + ├── LICENSE (BSD License) + ├── versioneer.py + ├── tox.ini + ├── dev-requirements.txt + ├── doc + │ ├── requirements.txt + │ └── src + │ ├── conf.py + │ └── index.rst + ├── tests + │ ├── requirements.txt + │ ├── integration_test.py + │ └── benchmark + │ ├── requirements.txt + │ ├── main.py + │ ├── rosenbrock.py + │ ├── {algoname}.yaml + │ ├── bayesopt.yaml + │ └── random_search.yaml + └── src + └── orion + └── algo + └── {plugin_name} + ├── {algoname}.py + ├── __init__.py + └── _version.py + +The important files to modify are ``src/orion/algo/{plugin_name}/{module_name}.py`` to implement the +algorithm and ``tests/benchmark/{algo_name}.yaml`` to fill the arguments required for the algorithm +you implement. + +``LICENSE`` + +Note that you are free to change the License, copyright is to your name. + +``versioneer.py`` +``src/orion/algo/{plugin_name}/_version.py`` + +This serves to version automatically your algo, just ignore these if you don't plan to make +releases. + +``tests`` + +These are the automatically generated tests. + +``tests/benchmark`` + +Automatically generated benchmark test using the yaml files created in the same folder. + +``doc`` + +Automatically generated template for documentation + +``tox.ini`` + +Tox file defining commands to run tests, build doc and publish code. + +Implementation +============== + +.. code-block:: python + + requires = 'real' + +Some algorithms requires the search space to be real. You can specify this requirement by adding the +attribute ``requires = 'real'`` at the base of the class definition.In this case, the algorithm +wrapper in Orion's core will convert the search space to real one before passing it to your +algorithm. This way the user can define discrete or categorital dimensions while using algorithms +that require a real space. + +.. code-block:: python + + def __init__(self, space, seed=None): + +The initialization of the algorithm must pass space and seed to ``super().__init__``, but must also +pass any other argument that must be part of the configuration of the algorithm. Any argument passed +to ``super()`` will be assigned as an attribute to the algorithm and will be included in +``algo.configuration``, which is used to save the configuration of the algorithm in the database. + + +.. code-block:: python + + def seed_rng(self, seed=None): + +This method must seed the internal state of the algorithm so that it would always sample the same +sequence of points. + +.. code-block:: python + + @property + def state_dict(self): + +The state dict is used to copy algorithms within the parallel strategy. All algorithms must provide +a state dict to ensure that we reset it to a previous state. + +.. code-block:: python + + def set_state(self, state_dict): + +Stateful attributes of the algorithm are reset using the given ``state_dict``. Note that +``set_state`` must be compliant with ``state_dict`` and use +the same structure. + +.. code-block:: python + + def suggest(self, num=1): + +The method to suggest new trials. The argument ``num=1`` +request the number of trials that the algorithm must sample. Note that it is possible to only +support ``num=1`` and raise ValueError otherwise. + +.. code-block:: python + + def observe(self, points, results): + +The method to observe results of suggested trials. Note that observe may be called several times for +the same points. Make sure to handle this properly within your algorithm if this is problematic. +Points are passed as a list of lists, each list representing the value of the params in the order +defined in ``self.space`` + +Tests +===== + +To test the freshly built package, you must first install the requirements. From within the new +package, run + +.. code-block:: console + + $ pip install -r tests/requirements.txt + +You can then run the unit-tests with + +.. code-block:: console + + $ pytest tests/integration_test.py + +or using ``tox`` + +.. code-block:: console + + $ tox -e py36 + +Note that the algorithm pre-built is random search so that you can start from a fully working +environment and test your way through the modifications. + +There is also the option of running the toy-benchmark to compare the performance of your algorithm +with random search and bayesian optimization. First install the requirements. + +.. code-block:: console + + $ pip install -r tests/benchmark/requirements.txt + +And then execute the benchmark + +.. code-block:: console + + $ pytest tests/benchmark/main.py + +or using ``tox`` + +.. code-block:: console + + $ tox -e benchmark + +Finally, official plugins must follow the same code quality standards than ``orion.core``. Therefore +there is tests included in the pre-built package for ``flake8`` and ``pylint``. You can execute them +with + +.. code-block:: console + + $ tox -e flake8 + +and + +.. code-block:: console + + $ tox -e pylint diff --git a/docs/src/plugins/analysis.rst b/docs/src/plugins/analysis.rst deleted file mode 100644 index 035d79058..000000000 --- a/docs/src/plugins/analysis.rst +++ /dev/null @@ -1,3 +0,0 @@ -*************************** -Analysis and visualizations -*************************** diff --git a/docs/src/plugins/base.rst b/docs/src/plugins/base.rst index f81d6b8f7..9a6281a5f 100644 --- a/docs/src/plugins/base.rst +++ b/docs/src/plugins/base.rst @@ -1,16 +1,8 @@ -************************************ -How to setup and distribute a plugin -************************************ +******** +Overview +******** +Developing plugins for Oríon is easy. We offer a plugin template powered by cookiecutter_. The +instructions are on the `template's page `_. -Setup -===== - - -Distribution -============ - -Template `setup.py `_ -TODO - -.. _gradient_descent_algo_setup: https://github.com/epistimio/orion/blob/master/tests/functional/gradient_descent_algo/setup.py +.. _cookiecutter: https://github.com/cookiecutter/cookiecutter diff --git a/docs/src/plugins/database.rst b/docs/src/plugins/database.rst deleted file mode 100644 index e5908ac35..000000000 --- a/docs/src/plugins/database.rst +++ /dev/null @@ -1,3 +0,0 @@ -******** -Database -******** diff --git a/docs/src/install/plugins.rst b/docs/src/plugins/install.rst similarity index 86% rename from docs/src/install/plugins.rst rename to docs/src/plugins/install.rst index 588ab7021..772c9b3b2 100644 --- a/docs/src/install/plugins.rst +++ b/docs/src/plugins/install.rst @@ -1,20 +1,20 @@ -*********************** -Installation of plugins -*********************** +********** +Installing +********** Oríon is built to be easily extensible. Algorithms and database backends can be implemented in external repositories and installed as plugins. The installation process is very simple, you only need to install them as you would for Oríon's core (see :doc:`/install/core`). They will be automatically detected and available for Oríon at run-time. -Note that active development is currently focused on the core. Therefore only one algorithm plugin -is available for now. - -For more information about how to develop your own plugins, see section :doc:`/plugins/base`. +For more information about how to develop your own plugins, see section :doc:`/plugins/algorithms`. Algorithms ========== +Note that active development is currently focused on the core. Therefore only one algorithm plugin +is available for now. + Skopt algorithms ---------------- @@ -34,8 +34,9 @@ Next we define the file ``bayes.yaml`` as this .. code-block:: yaml - name: orion-with-bayes - algorithms: BayesianOptimizer + experiment: + name: orion-with-bayes + algorithms: BayesianOptimizer Then call ``orion hunt`` with the configuration file. @@ -44,9 +45,3 @@ Then call ``orion hunt`` with the configuration file. $ orion hunt --config bayes.yaml ./script.sh --lr~'loguniform(1e-5, 1.0)' Now, we have a Bayesian optimizer sampling learning-rate values to optimize the error rate. - - -Database backend -================ - -TODO diff --git a/docs/src/reference/.gitkeep b/docs/src/reference/.gitkeep deleted file mode 100644 index e69de29bb..000000000 diff --git a/docs/src/examples/cluster.rst b/docs/src/tutorials/cluster.rst similarity index 93% rename from docs/src/examples/cluster.rst rename to docs/src/tutorials/cluster.rst index d6a658d38..50eda5a46 100644 --- a/docs/src/examples/cluster.rst +++ b/docs/src/tutorials/cluster.rst @@ -2,7 +2,7 @@ Running on HPC ************** -This guide is based on the example described in :doc:`/user/pytorch`. +This guide is based on the example described in :doc:`/tutorials/pytorch-mnist`. Parallel optimization using arrays ================================== @@ -19,7 +19,7 @@ together. A minimal Slurm script to launch 10 workers would thus only require th #SBATCH --array=1-10 - orion hunt -n parallel-exp ./main.py --lr~'loguniform(1e-5, 1.0)' + orion hunt -n parallel-exp python main.py --lr~'loguniform(1e-5, 1.0)' All workers are optimizing the experiment ``parallel-exp`` in parallel, each holding a copy of the optimization algorithm. Adding Slurm options to execute the mnist example with proper ressources @@ -36,7 +36,7 @@ gives the following #SBATCH --mem=10GB #SBATCH --time=2:59:00 - orion hunt -n parallel-exp --worker-trials 1 ./main.py --lr~'loguniform(1e-5, 1.0)' + orion hunt -n parallel-exp --worker-trials 1 python main.py --lr~'loguniform(1e-5, 1.0)' For now, Oríon does not provide detection of lost trials if a worker gets killed due to a timeout. Such trial would be indefinitely marked as ``pending`` in the DB and thus could not be @@ -57,7 +57,7 @@ character ``%`` (ex: ``#SBATCH --array=1-100%10``). #SBATCH --mem=10GB #SBATCH --time=2:59:00 - orion hunt -n parallel-exp --worker-trials 1 ./main.py --lr~'loguniform(1e-5, 1.0)' + orion hunt -n parallel-exp --worker-trials 1 python main.py --lr~'loguniform(1e-5, 1.0)' SSH tunnels @@ -135,7 +135,7 @@ These lines can then be added to the script to submit workers in parallel. ssh -o StrictHostKeyChecking=no -L $ORION_DB_PORT::27017 -n -N -f - orion hunt -n parallel-exp --worker-trials 1 ./main.py --lr~'loguniform(1e-5, 1.0)' + orion hunt -n parallel-exp --worker-trials 1 python main.py --lr~'loguniform(1e-5, 1.0)' Notes for MongoDB diff --git a/docs/src/user/pytorch.rst b/docs/src/tutorials/pytorch-mnist.rst similarity index 82% rename from docs/src/user/pytorch.rst rename to docs/src/tutorials/pytorch-mnist.rst index be8e944f3..bffad0b38 100644 --- a/docs/src/user/pytorch.rst +++ b/docs/src/tutorials/pytorch-mnist.rst @@ -1,6 +1,6 @@ -************** -Simple example -************** +************* +PyTorch MNIST +************* This is a simple tutorial on running hyperparameter search with Oríon on Pytorch's MNIST example @@ -16,7 +16,7 @@ PyTorch `examples repository`_: .. code-block:: bash $ pip3 install torch torchvision - $ git clone git@github.com:pytorch/examples.git + $ git clone https://github.com/pytorch/examples.git .. _examples repository: https://github.com/pytorch/examples @@ -25,11 +25,11 @@ PyTorch `examples repository`_: Adapting the code for Oríon =========================== -To use Oríon with any code we need to do three things +To use Oríon with any code we need to do two things -1. make the ``main.py`` file a python executable -2. import the ``orion.client.report_results`` helper function -3. call `report_results` on the final objective output to be minimized (e.g. final test error rate) +1. import the ``orion.client.report_objective`` helper function +2. call `report_objective` on the final objective output to be minimized + (e.g. final test error rate) After cloning pytorch examples repository, cd to mnist folder: @@ -37,39 +37,29 @@ After cloning pytorch examples repository, cd to mnist folder: $ cd examples/mnist -1. In your favourite editor add a shebang line ``#!/usr/bin/env python`` to -the ``main.py`` and make it executable, for example: - -.. code-block:: bash - - $ sed -i '1s/^/#!/usr/bin/env python/' main.py - $ chmod +x main.py - -2. At the top of the file, below the imports, add one line of import the helper function -``orion.client.report_results()``: +1. At the top of the file, below the imports, add one line of import for the helper function +``orion.client.report_objective()``: .. code-block:: python - from orion.client import report_results + from orion.client import report_objective -3. We need the test error rate so we're going to add a line to the function ``test()`` to return it +2. We need the test error rate so we're going to add a line to the function ``test()`` to return it .. code-block:: python return 1 - (correct / len(test_loader.dataset)) -Finally, we get back this test error rate and call ``report_results`` to -return the final objective value to Oríon. Note that ``report_results`` is meant to +Finally, we get back this test error rate and call ``report_objective`` to +return the final objective value to Oríon. Note that ``report_objective`` is meant to be called only once because Oríon only looks at 1 ``'objective'`` value per run. .. code-block:: python test_error_rate = test(args, model, device, test_loader) - report_results([dict( - name='test_error_rate', - type='objective', - value=test_error_rate)]) + report_objective(test_error_rate) + Execution ========= @@ -79,7 +69,7 @@ rather simple. Normally you would call the script the following way. .. code-block:: bash - $ ./main.py --lr 0.01 + $ python main.py --lr 0.01 To use it with Oríon, you simply need to prepend the call with ``orion hunt -n `` and specify the hyper-parameter prior @@ -87,9 +77,9 @@ distributions. .. code-block:: bash - $ orion hunt -n orion-tutorial ./main.py --lr~'loguniform(1e-5, 1.0)' + $ orion hunt -n orion-tutorial python main.py --lr~'loguniform(1e-5, 1.0)' -This commandline call will sequentially execute ``./main.py --lr=`` with random +This commandline call will sequentially execute ``python main.py --lr=`` with random values sampled from the distribution ``loguniform(1e-5, 1.0)``. We support all distributions from scipy.stats_, plus ``choices()`` for categorical hyper-parameters (similar to numpy's `choice function`_). @@ -115,7 +105,7 @@ You can also register experiments without executing them. .. code-block:: bash - $ orion init_only -n orion-tutorial ./main.py --lr~'loguniform(1e-5, 1.0)' + $ orion init_only -n orion-tutorial python main.py --lr~'loguniform(1e-5, 1.0)' Results @@ -156,8 +146,12 @@ validation set. Oríon will always **minimize** the objective so make sure you never try to optimize something like the accuracy of the model unless you are looking for very very bad models. -You can also ``report_results`` of types ``'gradient'`` and ``'constraint'`` for -algorithms which require those parameters as well. +You can also report results of types ``'gradient'`` and ``'constraint'`` for +algorithms which require those parameters as well, or ``'statistic'`` for metrics +to be saved with the trial. See +:py:func:`report_results() ` +for more details. + Debugging ========= @@ -168,7 +162,7 @@ don't use ``--debug`` you will likely quickly fill your database with broken exp .. code-block:: bash - $ orion --debug hunt -n orion-tutorial ./main.py --lr~'loguniform(1e-5, 1.0)' + $ orion --debug hunt -n orion-tutorial python main.py --lr~'loguniform(1e-5, 1.0)' Hunting Options --------------- @@ -242,4 +236,3 @@ useful if many workers are executed in parallel and the algorithm has a strategy non-independant trials simultaneously. Otherwise, it is better to leave ``pool_size`` to its default value 1. Note that this option is not usefull useless you know the algorithm have a strategy to produce multiple trials simultaneously. If you have any doubt, leave it to 1. :) - diff --git a/docs/src/examples/pytorch_a2c_ppo.rst b/docs/src/tutorials/pytorch_a2c_ppo.rst similarity index 88% rename from docs/src/examples/pytorch_a2c_ppo.rst rename to docs/src/tutorials/pytorch_a2c_ppo.rst index f0899739f..20c174034 100644 --- a/docs/src/examples/pytorch_a2c_ppo.rst +++ b/docs/src/tutorials/pytorch_a2c_ppo.rst @@ -1,5 +1,5 @@ ********************************************* -Example with ikostrikov/pytorch-a2c-ppo-acktr +PyTorch A2C PPO ACKTR ********************************************* .. note :: @@ -44,17 +44,7 @@ we add: .. code-block:: python - #!/usr/bin/env python - from orion.client import report_results - - -and then we run - -.. code-block:: bash - - chmod +x main.py - -To make it executable. + from orion.client import report_objective Then, we ensure that we evaluate on a separate set of hold out random seeds for the environment (which should be different than the test set and training seed). @@ -86,10 +76,7 @@ algorithm: args.cuda, eval_env_seeds) - report_results([dict( - name='validation_return', - type='objective', - value=np.mean(validation_returns))]) + report_objective(name='validation_return', objective=np.mean(validation_returns)) Now we're ready to go to run orion's hyperparameter optimization! @@ -99,7 +86,7 @@ How to search for hyperparameters .. code-block:: bash orion -v hunt -n ppo_hopper \ - ./main.py --env-name "Hopper-v2" --algo ppo --use-gae --vis-interval 1 \ + python main.py --env-name "Hopper-v2" --algo ppo --use-gae --vis-interval 1 \ --log-interval 1 --num-stack 1 --num-steps 2048 --num-processes 1 \ --lr~'loguniform(1e-5, 1.0)' --entropy-coef 0 --value-loss-coef 1 \ --ppo-epoch 10 --num-mini-batch 32 --gamma~'uniform(.95, .9995)' --tau 0.95 \ diff --git a/docs/src/examples/pytorch_cifar.rst b/docs/src/tutorials/pytorch_cifar.rst similarity index 75% rename from docs/src/examples/pytorch_cifar.rst rename to docs/src/tutorials/pytorch_cifar.rst index 3e6b7b29f..c958561dd 100644 --- a/docs/src/examples/pytorch_cifar.rst +++ b/docs/src/tutorials/pytorch_cifar.rst @@ -1,6 +1,6 @@ -************************** -Example with pytorch-cifar -************************** +*************** +PyTorch CIFAR10 +*************** .. note :: @@ -13,15 +13,14 @@ Example with pytorch-cifar using the option `--debug`, but note that all data gathered during an execution will be lost at the end of it. -pip3 install torch torchvision - -git clone git@github.com:kuangliu/pytorch-cifar.git - -cd pytorch-cifar +Set up .. code-block:: bash - sed -i '1 i\#!/usr/bin/env python' main.py + pip3 install torch torchvision + + git clone https://github.com/kuangliu/pytorch-cifar.git + cd pytorch-cifar Add to last line of test() @@ -35,10 +34,7 @@ Last line of the main() function test_error_rate = test(epoch) - report_results([dict( - name='test_error_rate', - type='objective', - value=test_error_rate)]) + report_objective(objective, name='test_error_rate') .. code-block:: bash diff --git a/docs/src/tutorials/scikit-learn.rst b/docs/src/tutorials/scikit-learn.rst new file mode 100644 index 000000000..5a33967cb --- /dev/null +++ b/docs/src/tutorials/scikit-learn.rst @@ -0,0 +1,110 @@ +******************** +Scikit-learn +******************** +.. Might also think of moving this file to examples/ then we an example auto contained in the + repository. We invoke this file from index.rst + +In this tutorial, we're going to demonstrate how Oríon can be integrated to a minimal model using +`scikit-learn `_ on the `iris dataset +`_. The files mentioned in this tutorial are available +at `examples/scikitlearn-iris/ +`_ in Oríon's repository. + +The requirements are listed in requirements.txt. You can quickly install them using ``$ pip +install -r requirements.txt``. If you haven't installed Oríon previously, make sure to +:doc:`configure it properly ` before going further. + +Sample script +--------------- + +.. literalinclude:: /../../examples/scikitlearn-iris/main.py + :language: python + :lines: 1-2, 5-9, 13-30 + +This very basic script takes in parameter one positional argument for the hyper-parameter *epsilon* +which control the loss in the script. + +The script is divided in five parts: + +#. Parsing of the script arguments +#. Loading and splitting the dataset +#. Training a classifier using the researcher-defined *epsilon* +#. Evaluating the classifier using the testing set +#. Reporting the performance of the model. i.e., the accuracy. + +.. note:: + The workflow presented in the script is simplified on purpose compared to real ones. The + objective of this example is to illustrate the basic steps involved in using Oríon. + +To find a good *epsilon*, a user would run empirically ``$ python main.py `` multiple +times, choosing a new value for *epsilon* manually. + +This ad-hoc hyper-parameter optimization is unreliable, slow, and requires a lot of work from the +user. Oríon solves this problem by providing established hyper-parameter optimization +algorithms without disrupting the workflow of the user. Integrating it only require minimal +adjustments to your current workflow as we'll demonstrate in the next section. + +Enter Orion +----------- +Integrating Oríon into your workflow requires only two non-invasive changes: + 1. Define an objective to optimize. + 2. Specify the hyper-parameter space. + +For the former, this step takes place in the script training the model. The latter can either be +specified in a configuration file or directly while calling the script with Oríon. +For the purpose of the example, we'll configure the hyper-parameter space directly as a +command-line argument. + +Updating the script +^^^^^^^^^^^^^^^^^^^ +We only need to make one small change to the script: we report to Oríon the objective that we +want to **minimize** at the end of the script using :py:func:`orion.client.report_objective`: + +.. literalinclude:: /../../examples/scikitlearn-iris/main.py + :language: python + :lines: 32- + +In our example, we measure the accuracy of the model to qualify its performance. To get the best +accuracy possible, we need to minimize the difference between 1 and the accuracy to get it as +close to 1 as possible. Otherwise, we'll be minimizing the accuracy which will yield a poor model. + +:py:func:`orion.client.report_objective` can be imported using : + +.. code-block:: python + + from orion.client import report_objective + +Updating the script call +^^^^^^^^^^^^^^^^^^^^^^^^ +The last missing piece in automating the hyper-parameter optimization of our example model is to +supply Oríon with the values to use for *epsilon*. + +We specify the search space in the command line using ``orion~loguniform(1e-5, 1.0)`` +as the argument for *espilon*. This argument will tell Oríon to use a log uniform distribution +between ``1e-5`` and ``1`` for the values of *epsilon*. + +Putting everything together, we need to call ``main.py`` with Oríon. The syntax is the +following: ``$ orion hunt python main.py 'orion~loguniform(1e-5, 1.0)'``. Before executing it on +your terminal, you have to specify the name of the experiment using the ``-n`` option. It is also a +good idea to specify a stopping condition using ``--max-trials`` otherwise the optimization will +not stop unless you interrupt it with :kbd:`ctrl-c`: + +.. code-block:: bash + + $ orion hunt -n scitkit-iris-tutorial --max-trials 50 python main.py 'orion~loguniform(1e-5, 1.0)' + +.. warning:: + Make sure you installed the dependencies for the script before running it using ``pip install + -r requirements.txt``. + +Viewing the results +------------------- +Once the optimization reached its stopping condition, you can query Oríon to give you the results +of the optimization with the sub-command ``$ orion info``: + +.. code-block:: bash + + $ orion info -n scitkit-iris-tutorial + +You can also query the results from the database using :ref:`Oríon's python API +`. Check it out to learn more and see examples. diff --git a/docs/src/user/algorithms.rst b/docs/src/user/algorithms.rst index 3c22f8e2e..aa801dd07 100644 --- a/docs/src/user/algorithms.rst +++ b/docs/src/user/algorithms.rst @@ -1,6 +1,8 @@ -**************** -Setup Algorithms -**************** +.. _Setup Algorithms: + +********** +Algorithms +********** .. contents:: :depth: 2 @@ -16,9 +18,10 @@ In a Oríon configuration YAML, define: .. code-block:: yaml - algorithms: - gradient_descent: - learning_rate: 0.1 + experiment: + algorithms: + gradient_descent: + learning_rate: 0.1 In this particular example, the name of the algorithm extension class to be imported and instantiated is ``Gradient_Descent``, so the lower-case identifier @@ -47,15 +50,79 @@ Configuration .. code-block:: yaml - algorithms: - random: - seed: null + experiment: + algorithms: + random: + seed: null ``seed`` Seed for the random number generator used to sample new trials. Default is ``None``. +.. _hyperband-algorithm: + +Hyperband +--------- + +`Hyperband`_ extends the `SuccessiveHalving`_ algorithm by providing a way to exploit a +fixed budget with different number of configurations for ``SuccessiveHalving`` algorithm to +evaluate. Each run of ``SuccessiveHalving`` will be defined as a ``bracket`` in Hyperband. +Hyperband requires two inputs (1) ``R``, the maximum amount of resource that can be allocated +to a single configuration, and (2) ``eta``, an input that controls the proportion of +configurations discarded in each round of SuccessiveHalving. + +To use Hyperband in Oríon, you must specify one parameter with ``fidelity(low, high, base)`` +as the prior, ``low`` will be ignored, ``high`` will be taken as the maximum resource ``R`` +and ``base`` will be taken as the reduction factor ``eta``. + +Number of epochs usually can be used as the resource but the algorithm is generic and can be +applied to any multi-fidelity setting. That is, you can use training time, specifying the +fidelity with ``--epochs~fidelity(low=1, high=81, base=3)`` +(assuming your script takes this argument in commandline), +but you could also use other fidelity +such as dataset size ``--dataset-size~fidelity(low=500, high=50000)`` +(assuming your script takes this argument and adapt dataset size accordingly). + + +.. _SuccessiveHalving: https://arxiv.org/abs/1502.07943 + +.. note:: + + Current implementation does not support more than one fidelity dimension. + +Configuration +~~~~~~~~~~~~~ + +.. code-block:: yaml + + experiment: + algorithms: + hyperband: + seed: null + repetitions: 1 + + strategy: StubParallelStrategy + + +.. note:: + + Notice the additional ``strategy`` in configuration which is not mandatory for most other + algorithms. See :ref:`StubParallelStrategy` for more information. + + +``seed`` + +Seed for the random number generator used to sample new trials. Default is ``None``. + +``repetitions`` + +Number of executions for Hyperband. A single execution of Hyperband takes a finite +budget of ``(log(R)/log(eta) + 1) * (log(R)/log(eta) + 1) * R``, and ``repetitions`` allows you +to run multiple executions of Hyperband. Default is ``numpy.inf`` which means to run Hyperband +until no new trials can be suggested. + + .. _ASHA: ASHA @@ -96,19 +163,19 @@ Configuration .. code-block:: yaml - algorithms: - asha: - seed: null - num_rungs: null - num_brackets: 1 + experiment: + algorithms: + asha: + seed: null + num_rungs: null + num_brackets: 1 - producer: - strategy: StubParallelStrategy + strategy: StubParallelStrategy .. note:: - Notice the additional ``producer.strategy`` in configuration which is not mandatory for other + Notice the additional ``strategy`` in configuration which is not mandatory for most other algorithms. See :ref:`StubParallelStrategy` for more information. @@ -130,6 +197,83 @@ converging trials that do not lead to best results at convergence (stragglers). To overcome this, you can increase the number of brackets, which increases the amount of resources required for optimisation but decreases the bias towards stragglers. Default is 1. + +.. _tpe-algorithm: + +TPE +--------- + +`Tree-structured Parzen Estimator`_ (TPE) algorithm is one of Sequential Model-Based +Global Optimization (SMBO) algorithms, which will build models to propose new points based +on the historical observed trials. + +Instead of modeling p(y|x) like other SMBO algorithms, TPE models p(x|y) and p(y), +and p(x|y) is modeled by transforming that generative process, replacing the distributions of +the configuration prior with non-parametric densities. + +The TPE defines p(x|y) using two such densities l(x) and g(x) where l(x) is distribution of +good points and g(x) is the distribution of bad points. Good and bad points are split from observed +points so far with a parameter `gamma` which defines the ratio of good points. New point candidates +will be sampled with l(x) and Expected Improvement (EI) optimization scheme will be used to find +the most promising point among the candidates. + + +.. _Tree-structured Parzen Estimator: + https://papers.nips.cc/paper/4443-algorithms-for-hyper-parameter-optimization.pdf + +.. note:: + + Current implementation only supports uniform, loguniform, uniform discrete and choices as prior. + As for choices prior, the probabilities if any given will be ignored. + +Configuration +~~~~~~~~~~~~~ + +.. code-block:: yaml + + experiment: + algorithms: + tpe: + seed: null + n_initial_points: 20 + n_ei_candidates: 25 + gamma: 0.25 + equal_weight: False + prior_weight: 1.0 + full_weight_num: 25 + + +``seed`` + +Seed to sample initial points and candidates points. Default is ``None``. + +``n_initial_points`` + +Number of initial points randomly sampled. Default is ``20``. + +``n_ei_candidates`` + +Number of candidates points sampled for ei compute. Default is ``24``. + +``gamma`` + +Ratio to split the observed trials into good and bad distributions. Default is ``0.25``. + +``equal_weight`` + +True to set equal weights for observed points. Default is ``False``. + +``prior_weight`` + +The weight given to the prior point of the input space. Default is ``1.0``. + +``full_weight_num`` + +The number of the most recent trials which get the full weight where the others will be +applied with a linear ramp from 0 to 1.0. It will only take effect if ``equal_weight`` +is ``False``. Default is ``25``. + + Algorithm Plugins ================= @@ -156,15 +300,16 @@ Configuration .. code-block:: yaml - algorithms: - BayesianOptimizer: - seed: null - n_initial_points: 10 - acq_func: gp_hedge - alpha: 1e-10 - n_restarts_optimizer: 0 - noise: "gaussian" - normalize_y: False + experiment: + algorithms: + BayesianOptimizer: + seed: null + n_initial_points: 10 + acq_func: gp_hedge + alpha: 1.0e-10 + n_restarts_optimizer: 0 + noise: "gaussian" + normalize_y: False ``seed`` @@ -256,10 +401,10 @@ The value of the objective is customizable with ``stub_value``. .. code-block:: yaml - producer: - strategy: - StubParallelStrategy: - stub_value: 'custom value' + experiment: + strategy: + StubParallelStrategy: + stub_value: 'custom value' .. _MaxParallelStrategy: @@ -274,10 +419,10 @@ is ``float('inf')`` by default. .. code-block:: yaml - producer: - strategy: - MaxParallelStrategy: - default_result: 10000 + experiment: + strategy: + MaxParallelStrategy: + default_result: 10000 MeanParallelStrategy @@ -291,7 +436,7 @@ is ``float('inf')`` by default. .. code-block:: yaml - producer: - strategy: - MeanParallelStrategy: - default_result: 0.5 + experiment: + strategy: + MeanParallelStrategy: + default_result: 0.5 diff --git a/docs/src/user/api.rst b/docs/src/user/api.rst new file mode 100644 index 000000000..5c433dc15 --- /dev/null +++ b/docs/src/user/api.rst @@ -0,0 +1,166 @@ +******** +Optimize +******** + +.. contents:: + :depth: 2 + :local: + + +There are two ways of using Oríon for optimization. One is using the commandline interface which +conveniently turn a simple script into a hyper-parameter optimization process at the level of the +command line. +The other is using the python interface which gives total control +over the pipeline of optimization. + +Commandline API +=============== + +Suppose you normally execute your script with the following command + +.. code-block:: bash + + $ python main.py --lr 0.1 + +Using the commandline API you can turn your script into a hyper-parameter process by wrapping it +with Oríon. + +.. code-block:: bash + + $ orion hunt -n exp-name python main.py --lr~'loguniform(1e-5, 1.0)' + +An experiment called ``exp-name`` will now be created and your script will be called with +the argument ``--lr`` assigned to values sampled by the optimization algorithm. + +Configuration of the algorithm can be done inside a yaml file passed to ``--config`` as described in +:ref:`Setup Algorithms`. + +To return the results to orion, you must add a call to +:py:func:`orion.client.report_objective(value) ` +in your script at the end of the execution. + +See :py:mod:`orion.client.cli` for more information on all helper functions available. + + +Python APIs +=========== + +The python API is declined in two versions + +:ref:`sequential_api`: + A simple method for local sequential optimision. +:ref:`service_api`: + A simple method to get new parameters to try and report results in a distributed manner. +:ref:`framework_api`: + Total control over the hyper-parameter optimization pipeline. + + +.. _sequential_api: + +Sequential API +-------------- + +Using the helper :py:func:`orion.client.workon`, +you can optimize a function with a single line of code. + +.. code-block:: python + + from orion.client import workon + + + def foo(x): + return [dict( + name='dummy_objective', + type='objective', + value=1)] + + + experiment = workon(foo, space=dict(x='uniform(-50,50)')) + + +The experiment object returned can be used to fetch the database of trials +and analyze the optimization process. Note that the storage for `workon` is +in-memory and requires no setup. This means however that :py:func:`orion.client.workon` +cannot be used for parallel optimisation. + +.. _service_api: + +Service API +----------- + +Experiments are created using the helper function +:py:func:`orion.client.create_experiment`. +You can then sample new trials with +:py:meth:`experiment.suggest() `. +The parameters of the trials are provided as a dictionary with +:py:meth:`trial.params `. +Once the trial is completed, results can be reported to the experiment with +:py:meth:`experiment.observe() `. +Note that this should be the final result of the trial. When observe is called, the trial +reservation is released and its status is set to completed. Observing twice the same trial will +raise a RuntimeError because the trial is not reserved anymore. + +.. code-block:: python + + from orion.client import create_experiment + + experiment = create_experiment( + name='foo', + space=dict(x='uniform(-50,50)')) + + trial = experiment.suggest() + + # Do something using trial.params['x'] + + results = [dict( + name='dummy_objective', + type='objective', + value=dummy_objective)] + + experiment.observe(trial, results) + + +The storage used by the experiment can be specified as an argument to +:py:func:`create_experiment(storage={}) ` +or in a global configuration file as described in :ref:`install_database`. + +To distribute the hyper-parameter optimisation in many workers, simply execute your script in +parallel where you want to execute your trials. The method +:py:meth:`experiment.suggest() ` +will take care of synchronizing the local algorithm with all remote instances, making it possible +to distribute the optimization without setting up a master process. + +See :py:class:`ExperimentClient ` +for more information on the experiment client object. + +.. warning:: + + Code version detection is not currently supported. This means that creating experiments using + different code version will not lead to version increment like it would do with the commandline + API. + + +.. _framework_api: + + +Framework API +------------- + +.. warning:: + + This API is not implemented yet. It should be included in v0.2.0. + +.. code-block:: python + + from orion.client import create_space + from orion.client import create_algo + + space = create_space(x='uniform(-50,50)') + + algo = create_algo(space, type='ASHA', add some config here) + + params = algo.suggest() + + results = 'some_results...' + + algo.observe(params, results) diff --git a/docs/src/user/cli/info.rst b/docs/src/user/cli/info.rst index ae8685937..7ba08a107 100644 --- a/docs/src/user/cli/info.rst +++ b/docs/src/user/cli/info.rst @@ -18,7 +18,7 @@ Here is an example of all the sections provided by the command. Commandline =========== - --lr~loguniform(1e-5, 1.0) + python main.py --lr~loguniform(1e-5, 1.0) Config diff --git a/docs/src/user/config.rst b/docs/src/user/config.rst new file mode 100644 index 000000000..db1782e73 --- /dev/null +++ b/docs/src/user/config.rst @@ -0,0 +1,618 @@ +.. _configuration: + +********************** +Advanced Configuration +********************** + +Oríon supports different levels of configuration to provide flexibility. +The configuration levels are hierchical with higher levels having precedence over the +lower ones. The levels are the following: + +1. Default configuration +2. Global configuration +3. Environment variables +4. Experiment configuration from database* +5. Local configuration +6. Commandline arguments + +Where larger numbers have precedence over +the lower ones. We describe here further each +type of configuration. + +**1. Default Configuration** + +Default values defined in core code of Oríon `here `_. + +**2. Global Configuration** + +Defined in a yaml file in global configuration folders of Oríon. +On Linux systems, this is typically at ``$HOME/.config/orion.core``. You can get a list +of these folders on your system with the following command. + +.. code-block:: bash + + $ python -c 'import orion.core; print("\n".join(orion.core.DEF_CONFIG_FILES_PATHS))' + /usr/share/awesome/orion.core/orion_config.yaml.example + /etc/xdg/xdg-awesome/orion.core/orion_config.yaml + /home/user/.config/orion.core/orion_config.yaml + +This list is an example and will likely be differ on your end. + +**3. Environment Variables** + +Most options are configurable through environment variables as well. +These are provided in the documented below at the **Env var** fields, +whenever configurable otherwise the field is left empty. + +**4. Experiment Configuration from Database** + +\*Experiment configuration from database is not configurable by the user per say, +but it is represented here because whenever something differs between the experiment's +configuration and the levels below, the experiment's configuration will have precedence. +For instance if ``experiment.max_trials`` is set to 10 in global configuration +and an experiment with ``max_trials`` set to 100 is resumed, then ``max_trials`` +will be 100, not 10. +However, if ``experiment.max_trials`` is set to 10 in local configuration file +(the file passed with ``--config`` to the hunt command) or with commandline argument +``--exp-max-trials``, then the experiments ``max_trials`` will be updated to 10. +This make it possible to resume experiments without specifying the whole configuration, +because experiment configuration from database is reused, but it also makes it possble +to create a new experiment based on a previous one by simply specifying what to change. + +**5. Local Configuration** + +Defined in a yaml file that is passed to commandline. + +.. code-block:: bash + + orion [COMMAND] --config local.yaml + +Not to be confused with a configuration file that may be passed to the user's script. + +.. code-block:: bash + + orion hunt --config local.yaml ./myscript.sh --config script.yaml + +Here ``script.yaml`` is the user's script configuration (passed to ``./myscript.sh``), +and ``local.yaml`` is the local configuration script (passed to ``hunt``). + +The configuration passed through the python API is also considered as local configuration. + +**6. Commandline Arguments** + +All arguments of ``orion hunt``. + +Full Example of Global Configuration +------------------------------------ + +.. code-block:: yaml + + database: + host: localhost + name: orion + port: 27017 + type: mongodb + + experiment: + algorithms: + random: + seed: None + max_broken: 3 + max_trials: 1000000000 + pool_size: 1 + strategy: + MaxParallelStrategy + worker_trials: 1000000000 + working_dir: + + worker: + heartbeat: 120 + interrupt_signal_code: 130 + max_broken: 10 + max_idle_time: 60 + max_trials: 1000000000 + user_script_config: config + + evc: + algorithm_change: False + auto_resolution: True + cli_change_type: break + code_change_type: break + config_change_type: break + ignore_code_changes: False + manual_resolution: False + non_monitored_arguments: [] + + +---- + + +.. _config_database: + +Database +-------- + +.. code-block:: yaml + + database: + host: localhost + name: orion + port: 27017 + type: mongodb + + +.. _config_database_name: + +name +~~~~ + +:Type: str +:Default: orion +:Env var: ORION_DB_NAME +:Description: + Name of the database. + + + +.. _config_database_type: + +type +~~~~ + +:Type: str +:Default: MongoDB +:Env var: ORION_DB_TYPE +:Description: + Type of database. Builtin backends are ``mongodb``, ``pickleddb`` and ``ephemeraldb``. + + + +.. _config_database_host: + +host +~~~~ + +:Type: str +:Default: 127.0.1.1 +:Env var: ORION_DB_ADDRESS +:Description: + URI for ``mongodb``, or file path for ``pickleddb``. + + + +.. _config_database_port: + +port +~~~~ + +:Type: int +:Default: 27017 +:Env var: ORION_DB_PORT +:Description: + Port address for ``mongodb``. + + + +---- + + +.. _config_experiment: + +Experiment +---------- + +.. code-block:: yaml + + experiment: + algorithms: + random: + seed: None + max_broken: 3 + max_trials: 1000000000 + pool_size: 1 + strategy: + MaxParallelStrategy + worker_trials: 1000000000 + working_dir: + + + +.. _config_experiment_name: + +name +~~~~ + +.. note:: This option is only supported in local configuration. + +:Type: str +:Default: +:Env var: +:Description: + Name of the experiment. + + +.. _config_experiment_version: + +version +~~~~~~~ + +.. note:: This option is only supported in local configuration. + + +:Type: int +:Default: None +:Env var: +:Description: + Version of the experiment. If not defined, latest experiment for the given + name will be selected. Version is automatically incremented if there is any + modification detected in the experiment's configuration + (search space, algorithm configuration, code version, ...) + + +user +~~~~ + +.. note:: This option is only supported in local configuration. + +:Type: str +:Default: $USERNAME +:Env var: +:Description: + Name of the user to associate with the experiment. + + +.. _config_experiment_max_trials: + +max_trials +~~~~~~~~~~ + +:Type: int +:Default: 1000000000 +:Env var: ORION_EXP_MAX_TRIALS +:Description: + number of trials to be completed for the experiment. This value will be saved within the + experiment configuration and reused across all workers to determine experiment's completion. + + + +.. _config_experiment_worker_trials: + +worker_trials +~~~~~~~~~~~~~ + +.. warning:: + + **DEPRECATED.** This argument will be removed in v0.3. + See :ref:`worker: max_trials ` instead. + +:Type: int +:Default: 1000000000 +:Env var: +:Description: + (DEPRECATED) This argument will be removed in v0.3. + See :ref:`worker: max_trials ` instead. + + + +.. _config_experiment_max_broken: + +max_broken +~~~~~~~~~~ + +:Type: int +:Default: 3 +:Env var: ORION_EXP_MAX_BROKEN +:Description: + Maximum number of broken trials before experiment stops. + + + +.. _config_experiment_working_dir: + +working_dir +~~~~~~~~~~~ + +:Type: str +:Default: +:Env var: ORION_WORKING_DIR +:Description: + Set working directory for running experiment. + + + +.. _config_experiment_pool_size: + +pool_size +~~~~~~~~~ + +.. warning:: + + **DEPRECATED.** This argument will be removed in v0.3. + +:Type: int +:Default: 1 +:Env var: +:Description: + (DEPRECATED) This argument will be removed in v0.3. + + +.. _config_experiment_algorithms: + +algorithms +~~~~~~~~~~ + +:Type: dict +:Default: random +:Env var: +:Description: + Algorithm configuration for the experiment. + + + +.. _config_experiment_strategy: + +strategy +~~~~~~~~ + +:Type: dict +:Default: MaxParallelStrategy +:Env var: +:Description: + Parallel strategy to use with the algorithm. + + + +---- + + +.. _config_worker: + +Worker +------ + +.. code-block:: yaml + + worker: + heartbeat: 120 + interrupt_signal_code: 130 + max_broken: 10 + max_idle_time: 60 + max_trials: 1000000000 + user_script_config: config + + + +.. _config_worker_heartbeat: + +heartbeat +~~~~~~~~~ + +:Type: int +:Default: 120 +:Env var: ORION_HEARTBEAT +:Description: + Frequency (seconds) at which the heartbeat of the trial is updated. If the heartbeat of a + `reserved` trial is larger than twice the configured heartbeat, Oríon will reset the status of + the trial to `interrupted`. This allows restoring lost trials (ex: due to killed worker). + + + +.. _config_worker_max_trials: + +max_trials +~~~~~~~~~~ + +:Type: int +:Default: 1000000000 +:Env var: ORION_WORKER_MAX_TRIALS +:Description: + Number of trials to be completed for this worker. If the experiment is completed, the worker + will die even if it did not reach its maximum number of trials. + + + +.. _config_worker_max_broken: + +max_broken +~~~~~~~~~~ + +:Type: int +:Default: 3 +:Env var: ORION_WORKER_MAX_BROKEN +:Description: + Maximum number of broken trials before worker stops. + + + +.. _config_worker_max_idle_time: + +max_idle_time +~~~~~~~~~~~~~ + +:Type: int +:Default: 60 +:Env var: ORION_MAX_IDLE_TIME +:Description: + Maximum time the producer can spend trying to generate a new suggestion.Such timeout are + generally caused by slow database, large number of concurrent workers leading to many race + conditions or small search spaces with integer/categorical dimensions that may be fully + explored. + + + +.. _config_worker_interrupt_signal_code: + +interrupt_signal_code +~~~~~~~~~~~~~~~~~~~~~ + +:Type: int +:Default: 130 +:Env var: ORION_INTERRUPT_CODE +:Description: + Signal returned by user script to signal to Oríon that it was interrupted. + + + +.. _config_worker_user_script_config: + +user_script_config +~~~~~~~~~~~~~~~~~~ + +:Type: str +:Default: config +:Env var: ORION_USER_SCRIPT_CONFIG +:Description: + Config argument name of user's script (--config). + + + +---- + + +.. _config_evc: + +Experiment Version Control +-------------------------- + +.. code-block:: yaml + + evc: + algorithm_change: False + auto_resolution: True + cli_change_type: break + code_change_type: break + config_change_type: break + ignore_code_changes: False + manual_resolution: False + non_monitored_arguments: [] + + + +.. _config_evc_auto_resolution: + +auto_resolution +~~~~~~~~~~~~~~~ + +.. warning:: + + **DEPRECATED.** This argument will be removed in v0.3. + See :ref:`evc: manual_resolution ` to avoid auto-resolution. + +:Type: bool +:Default: True +:Env var: +:Description: + (DEPRECATED) This argument will be removed in v0.3. Conflicts are now resolved automatically by + default. See :ref:`evc: manual_resolution ` to avoid + auto-resolution. + + +.. _config_evc_manual_resolution: + +manual_resolution +~~~~~~~~~~~~~~~~~ + +:Type: bool +:Default: False +:Env var: ORION_EVC_MANUAL_RESOLUTION +:Description: + If ``True``, enter experiment version control conflict resolver for manual resolution on + branching events. Otherwise, auto-resolution is attempted. + + + +.. _config_evc_non_monitored_arguments: + +non_monitored_arguments +~~~~~~~~~~~~~~~~~~~~~~~ + +:Type: list +:Default: [] +:Env var: ORION_EVC_NON_MONITORED_ARGUMENTS +:Description: + Ignore these commandline arguments when looking for differences in user's commandline call. + Environment variable and commandline only supports one argument. Use global config or local + config to pass a list of arguments to ignore. + + + +.. _config_evc_ignore_code_changes: + +ignore_code_changes +~~~~~~~~~~~~~~~~~~~ + +:Type: bool +:Default: False +:Env var: ORION_EVC_IGNORE_CODE_CHANGES +:Description: + If ``True``, ignore code changes when looking for differences. + + + +.. _config_evc_algorithm_change: + +algorithm_change +~~~~~~~~~~~~~~~~ + +:Type: bool +:Default: False +:Env var: ORION_EVC_ALGO_CHANGE +:Description: + If ``True``, set algorithm change as resolved if a branching event occur. Child and parent + experiment have access to all trials from each other when the only difference between them is + the algorithm configuration. + + +.. _config_evc_code_change_type: + +code_change_type +~~~~~~~~~~~~~~~~ + +:Type: str +:Default: break +:Env var: ORION_EVC_CODE_CHANGE +:Description: + One of ``break``, ``unsure`` or ``noeffet``. Defines how trials should be filtered in Experiment + Version Control tree if there is a change in the user's code repository. If the effect of the + change is ``unsure``, the child experiment will access the trials of the parent but not the + other way around. This is to ensure parent experiment does not get corrupted with possibly + incompatible results. The child cannot access the trials from parent if ``code_change_type`` is + ``break``. The parent cannot access trials from child if ``code_change_type`` is ``unsure`` or + ``break``. + + + +.. _config_evc_cli_change_type: + +cli_change_type +~~~~~~~~~~~~~~~ + +:Type: str +:Default: break +:Env var: ORION_EVC_CMDLINE_CHANGE +:Description: + One of ``break``, ``unsure`` or ``noeffet``. Defines how trials should be filtered in Experiment + Version Control tree if there is a change in the user's commandline call. If the effect of the + change is ``unsure``, the child experiment will access the trials of the parent but not the + other way around. This is to ensure parent experiment does not get corrupted with possibly + incompatible results. The child cannot access the trials from parent if ``cli_change_type`` is + ``break``. The parent cannot access trials from child if ``cli_change_type`` is ``unsure`` or + ``break``. + + + +.. _config_evc_config_change_type: + +config_change_type +~~~~~~~~~~~~~~~~~~ + +:Type: str +:Default: break +:Env var: ORION_EVC_CONFIG_CHANGE +:Description: + One of ``break``, ``unsure`` or ``noeffet``. Defines how trials should be filtered in Experiment + Version Control tree if there is a change in the user's script. If the effect of the change is + ``unsure``, the child experiment will access the trials of the parent but not the other way + around. This is to ensure parent experiment does not get corrupted with possibly incompatible + results. The child cannot access the trials from parent if ``config_change_type`` is ``break``. + The parent cannot access trials from child if ``config_change_type`` is ``unsure`` or ``break``. diff --git a/docs/src/user/evc.rst b/docs/src/user/evc.rst index 18d3b7a85..54f3e44d5 100644 --- a/docs/src/user/evc.rst +++ b/docs/src/user/evc.rst @@ -24,7 +24,7 @@ omitted from the command samples. .. code-block:: bash - $ orion hunt -n orion-tutorial ./main.py --lr~'loguniform(1e-5, 1.0)' --momentum~'uniform(0, 1)' + $ orion hunt -n orion-tutorial python main.py --lr~'loguniform(1e-5, 1.0)' --momentum~'uniform(0, 1)' This cannot be the same as the experiment ``orion-tutorial`` since the space of optimization is now different. Such a call will trigger an experiment branching, meaning that a new experiment will @@ -82,7 +82,7 @@ change our commandline like this. .. code-block:: bash - $ orion hunt -n orion-tutorial ./main.py --lr~'loguniform(1e-5, 1.0)' --momentum~+'uniform(0, 1)' + $ orion hunt -n orion-tutorial python main.py --lr~'loguniform(1e-5, 1.0)' --momentum~+'uniform(0, 1)' Let's look back at the prompt above. Following the resolution of ``momentum`` conflict we see that it is now marked as resolved in the `Resolutions` list, while the experiment name is still @@ -119,14 +119,14 @@ the prompt, and the resolution will be marked accordingly. .. code-block:: bash - $ orion hunt -n orion-tutorial -b orion-tutorial-with-momentum ./main.py --lr~'loguniform(1e-5, 1.0)' --momentum~+'uniform(0, 1)' + $ orion hunt -n orion-tutorial -b orion-tutorial-with-momentum python main.py --lr~'loguniform(1e-5, 1.0)' --momentum~+'uniform(0, 1)' You can execute again this branched experiment by reusing the same commandline but replacing the new experiment name ``orion-tutorial-with-momentum``. .. code-block:: bash - $ orion hunt -n orion-tutorial-with-momentum ./main.py --lr~'loguniform(1e-5, 1.0)' --momentum~'uniform(0, 1)' + $ orion hunt -n orion-tutorial-with-momentum python main.py --lr~'loguniform(1e-5, 1.0)' --momentum~'uniform(0, 1)' Or as always by only specifying the experiment name. diff --git a/docs/src/user/library/evc_results.rst b/docs/src/user/library/evc_results.rst index b39e45919..abf276684 100644 --- a/docs/src/user/library/evc_results.rst +++ b/docs/src/user/library/evc_results.rst @@ -1,28 +1,22 @@ Iterative Results with EVC -------------------------- -When using the experiment version control (described `here `_), +When using the experiment version control (described :doc:`here `), the experiments are connected in a tree structure which we call the EVC tree. You can retrieve results from different experiments with the EVC tree similarly -as described in previous section. The only difference -is we need to use :class:`EVCBuilder ` instead of -:class:`ExperimentBuilder `. -The :class:`EVCBuilder ` will connect the experiment -to the EVC tree, accessible through the -:attr:`node ` attribute. -All trials of the tree can be fetched +as described in previous section. All trials of the tree can be fetched with option -:meth:`fetch_trials(with_evc_tree=True) `, +:meth:`fetch_trials(with_evc_tree=True) `, ``with_evc_tree=False``` will only fetch the trials of the specific experiment. .. code-block:: python import pprint - from orion.core.io.evc_builder import EVCBuilder - experiment = EVCBuilder().build_view_from( - {"name": "orion-tutorial-with-momentum"}) + from orion.client import create_experiment + + experiment = create_experiment(name="orion-tutorial-with-momentum") print(experiment.name) pprint.pprint(experiment.stats) diff --git a/docs/src/user/library/results.rst b/docs/src/user/library/results.rst index ccdeea2f7..6b399f329 100644 --- a/docs/src/user/library/results.rst +++ b/docs/src/user/library/results.rst @@ -3,43 +3,39 @@ Results You can fetch experiments and trials using python code. There is no need to understand the specific database backend used (such as MongoDB) since you can fetch results using the -:class:`orion.core.worker.experiment.Experiment` object. -The class :class:`orion.core.io.experiment_builder.ExperimentBuilder` -provides simple methods to fetch experiments +:class:`orion.client.experiment.ExperimentClient` object. +The helper function :py:func:`orion.client.create_experiment` +provides a simple way to fetch experiments using their unique names. You do not need to explicitly open a connection to the database since it will automatically infer its configuration from the global configuration file as when calling Oríon -in commandline. Otherwise you can pass other arguments to -:meth:`ExperimentBuilder().build_view_from() \ -`. - -using the same dictionary structure as in the configuration file. +in commandline. Otherwise you can specify the configuration directly to +:py:func:`create_experiment() `. Take a look at the documentation +for more details on all configuration arguments that are supported. .. code-block:: python # Database automatically inferred - ExperimentBuilder().build_view_from( - {"name": "orion-tutorial"}) + create_experiment(name="orion-tutorial") # Database manually set - ExperimentBuilder().build_view_from( - {"name": "orion-tutorial", - "dataset": { - "type": "mongodb", - "name": "myother", - "host": "localhost"}}) + create_experiment( + name="orion-tutorial", + storage={ + 'type': 'legacy', + 'database': { + 'type': 'mongodb', + 'name': 'myother', + 'host': 'localhost'}}) For a complete example, here is how you can fetch trials from a given experiment. .. code-block:: python - import datetime import pprint - from orion.core.io.experiment_builder import ExperimentBuilder - - some_datetime = datetime.datetime.now() - datetime.timedelta(minutes=5) + from orion.client import create_experiment - experiment = ExperimentBuilder().build_view_from({"name": "orion-tutorial"}) + experiment = create_experiment(name="orion-tutorial") pprint.pprint(experiment.stats) @@ -55,7 +51,7 @@ For a complete example, here is how you can fetch trials from a given experiment for trial in experiment.fetch_trials_by_status('completed'): print(trial.objective) -:class:`` has many methods that allows you to query -for different trials. You can find them in the code -.. _`mongodb-like syntax`: https://docs.mongodb.com/manual/reference/method/db.collection.find/ +:py:class:`ExperimentClient ` +has many methods that allows you to query +for different trials. You can find them in the code reference section. diff --git a/docs/src/user/overview.rst b/docs/src/user/overview.rst new file mode 100644 index 000000000..ebe62ca40 --- /dev/null +++ b/docs/src/user/overview.rst @@ -0,0 +1,27 @@ +******** +Overview +******** + +.. The goal of this document is to provide an overview of the content present in the user guide. +.. It is not meant to replace the general getting started or duplicate the content in the user guide +.. sections. + +In this guide, we explain the concepts of Oríon in depth. A light weight introduction is available +in :doc:`/install/gettingstarted` to familiarize yourself with the project and its concepts. + +The user guide is organized in the following sections: + +* :doc:`/user/script`. Describes how you can integrate Oríon to your existing scripts and + experiments. +* :doc:`/user/api`. Describes how to start and configure hyperparameter optimizations. +* :doc:`/user/algorithms`. Describes our optimization algorithms and their options. +* :doc:`/user/searchspace`. Describes our search space and their options. +* :doc:`/user/monitoring`. Describes how to observe trials and view experiments results. +* :doc:`/user/evc`. Describes how trials are organized and how you can reuse results from past + experiments. +* :doc:`/user/storage`. Describes how to interact with the experiment database directly. +* :doc:`/user/config`. Details in depth how the configuration system works. +* :doc:`/user/parallel`. Describes how to run multiple workers for the same experiment. + +If you have any questions or feel something is missing in the documentation, always feel free to +start a `new issue `_ and tell us! diff --git a/docs/src/user/parallel.rst b/docs/src/user/parallel.rst new file mode 100644 index 000000000..986311919 --- /dev/null +++ b/docs/src/user/parallel.rst @@ -0,0 +1,34 @@ +**************** +Parallel Workers +**************** + +In this chapter, we describe how Oríon can be run on multiple cores or computers for the same +optimization experiments. + +In most frameworks, a master-workers architecture is used. This implies that the master process must +be instantiated either by the user or by a third party provider, which incurs a significant +overhead for the users and third party dependencies -- often requiring to have an internet +connection. + +Oríon has a different approach that nullify these issues: we don't have a master process. Instead, +the workers make decisions based on their shared common history stored in the database. The +operations in the database are non-blocking, ensuring horizontal scalability for large search +spaces. + +We illustrate below the workflow for an hyperparameter optimization with a single worker, typically +executed on a personal laptop. + +.. figure:: /_resources/one.png + :alt: A single worker optimizing an experiment. + :align: center + :figclass: align-center + +More workers can be invoked by simply running the ``$ orion hunt -n exp ...`` command multiple +times. Each call spawns a new worker for the given experiment. The workers' workflow is unchanged +because the workers are synchronized during the creation of a new trial based on what other trials +were already completed by other workers. + +.. figure:: /_resources/synchronization.png + :alt: Multiple workers are synchronized while creating a new trial. + :align: center + :figclass: align-center diff --git a/docs/src/user/script.rst b/docs/src/user/script.rst index bf6772620..14fc6edf4 100644 --- a/docs/src/user/script.rst +++ b/docs/src/user/script.rst @@ -1,8 +1,8 @@ -****************** -Script Integration -****************** +********* +Integrate +********* -This section describes how to adapt the integration of the user script with Oríon. +This section describes how to adapt a user script with Oríon. To customize how Oríon parses the commandline or execution environment see :ref:`customization`. If the user script requires information about the running trial, such as its id, the working directory or the experiment's name, look at :ref:`commandline_templates` or @@ -23,7 +23,8 @@ this: .. code-block:: yaml - user_script_config: configuration + worker: + user_script_config: configuration It is then possible to run ``orion hunt`` like here: @@ -54,6 +55,28 @@ the local one passed to created. To access the particular working directory of a trial, see next sections :ref:`commandline_templates` and :ref:`env_vars`. +.. _language_compatibility: + +Language compatibility +====================== +The command line works for scripts and programs in any language. +The only requirement is that the executed script returns a JSON string with the objective value. + +The format is + +.. code-block:: json + + [ + { + "name": "some-objective", + "type": "objective", + "value": 1 + } + ] + + +See :meth:`orion.client.report_results` for more details. + .. _commandline_templates: Command-line templating @@ -92,6 +115,9 @@ Templates Description ``trial.id`` Unique ID of the trial ``trial.working_dir`` Working dir of the trial + +``trial.hash_params`` md5sum hash for the parameters (w/o fidelity) + ========================== ==================================== .. note:: diff --git a/docs/src/user/searchspace.rst b/docs/src/user/searchspace.rst index b59bfd293..0a1372631 100644 --- a/docs/src/user/searchspace.rst +++ b/docs/src/user/searchspace.rst @@ -110,7 +110,7 @@ Special arguments ex: ``uniform(0, 10, discrete=True)`` -Argument to cast a continous distribution into :ref:`integer-dim`. Defaults to ``False``. +Argument to cast a continuous distribution into :ref:`integer-dim`. Defaults to ``False``. ``default_value`` ----------------- @@ -122,6 +122,14 @@ without specifing this hyperparameter, assigning it the default value. This is a using the :ref:`EVC system`, so that experiments where an hyperparameter is deleted or added can adapt trials from other experiments by using the default value. +``precision`` +------------- + +ex: ``loguniform(1e-5, 1e-2, precision=2)`` + +Argument to sample a continuous distribution up to the requested precision. Defaults to ``4``. +The above example would sample values such as 6.7e-4 or 2.9e-3 but not 6.789e-4. + ``shape`` --------- diff --git a/docs/src/user/storage.rst b/docs/src/user/storage.rst new file mode 100644 index 000000000..fcee9d3ac --- /dev/null +++ b/docs/src/user/storage.rst @@ -0,0 +1,325 @@ +.. role:: hidden + :class: hidden-section + + +.. _storage: + +******* +Storage +******* + +In short, users are expected to only use the +:py:class:`ExperimentClient ` to interact +with the storage client, to fetch and register trials. Creation of experiments +should always be done through +:py:func:`create_experiment() `. + +If you need to access the storage with more flexibility, you can do +so using the methods of the storage client directly. See :ref:`storage_backend` section +for more details. + +Finally, legacy databases supported by Oríon can also be accessed directly in last +resort if the storage backend is not flexible enough. See :ref:`database_backend` section +for more details. + +.. _experiment_client: + +ExperimentClient +================ + +The experiment client must be created with the helper function +:py:func:`create_experiment() ` which will take care of +initiating the storage backend and create a new experiment if non-existant or simply load +the corresponding experiment from the storage. + +There is a small subset of methods to fetch trials or create new ones. We focus here +on the methods for loading or creation of trials in particular, see +:py:class:`ExperimentClient ` for documentation +of all methods. + +Here is a short example to fetch trials or insert a new one. + +.. code-block:: python + + from orion.client import create_experiment + + # Create the ExperimentClient + experiment = create_experiment('exp-name', space=dict(x='uniform(0, 1)')) + + # To fetch all trials from an experiment + trials = experiment.fetch_trials() + + # Insert a new trial in storage + experiment.insert(dict(x=0.5)) + + # Insert a new trial and reserve to execute + trial = experiment.insert(dict(x=0.6), reserve=True) + +:hidden:`fetch_trials` +---------------------- + +.. automethod:: orion.client.experiment.ExperimentClient.fetch_trials + :noindex: + +:hidden:`fetch_trials_by_status` +-------------------------------- + +.. automethod:: orion.client.experiment.ExperimentClient.fetch_trials_by_status + :noindex: + +:hidden:`fetch_noncompleted_trials` +----------------------------------- + +.. automethod:: orion.client.experiment.ExperimentClient.fetch_noncompleted_trials + :noindex: + +:hidden:`get_trial` +------------------- + +.. automethod:: orion.client.experiment.ExperimentClient.get_trial + :noindex: + +:hidden:`insert` +---------------- + +.. automethod:: orion.client.experiment.ExperimentClient.insert + :noindex: + + + +.. _storage_backend: + +Storage +======= + +.. warning:: + + The storage backends are not meant to be used directly by users. + Be careful if you use any method which modifies the data in storage or + you may break your experiment or trials. + +The storage backend is used by the +:py:class:`ExperimentClient ` +to read and write persistant records of the experiment and trials. +Although we recommand using the experiment client, +we document the storage backend here for users who may need +more flexibility. + +There is two ways for creating the storage client. If you +already created an experiment client, the storage +was already created during the process of creating the +experiment client and you can get it with +:py:func:`orion.storage.base.get_storage`. +Otherwise, you can create the storage client with +:py:func:`orion.storage.base.setup_storage` before +fetching it with +:py:func:`get_storage() `. +To recap, you can create it indirectly with +:py:func:`create_experiment() ` +or directly with +:py:func:`setup_storage() `. +In both case, you can access it with +:py:func:`get_storage() `. + +.. code-block:: python + + from orion.client import create_experiment + from orion.storage.base import get_storage, setup_storage + + # Create the ExperimentClient and storage implicitly + experiment = create_experiment('exp-name', space=dict(x='uniform(0, 1)')) + + # Or create storage explicitly using setup_storage + setup_storage(dict( + type='legacy', + database=dict( + type='pickleddb', + host='db.pkl') + ) + ) + ) + + # Get the storage client + storage = get_storage() + + # fetch trials + trials = storage.fetch_trials(uid=experiment.id) + + # Update trial status + storage.set_trial_status(trials[0], 'interrupted') + +.. note:: + + The function :py:func:`setup_storage() ` + reads the global configuration like + :py:func:`create_experiment() ` + does if there is missing information. Therefore, it is possible + to call it without any argument the same way it is possible + to call + :py:func:`create_experiment() ` + without specifying storage configuration. + +:hidden:`update_experiment` +--------------------------- + +.. automethod:: orion.storage.base.BaseStorageProtocol.update_experiment + :noindex: + +:hidden:`fetch_experiment` +-------------------------- + +.. automethod:: orion.storage.base.BaseStorageProtocol.fetch_experiments + :noindex: + +:hidden:`register_trial` +------------------------ + +.. automethod:: orion.storage.base.BaseStorageProtocol.register_trial + :noindex: + +:hidden:`reserve_trial` +----------------------- + +.. automethod:: orion.storage.base.BaseStorageProtocol.reserve_trial + :noindex: + +:hidden:`fetch_trials` +---------------------- + +.. automethod:: orion.storage.base.BaseStorageProtocol.fetch_trials + :noindex: + +:hidden:`get_trial` +------------------- + +.. automethod:: orion.storage.base.BaseStorageProtocol.get_trial + :noindex: + +:hidden:`fetch_lost_trials` +--------------------------- + +.. automethod:: orion.storage.base.BaseStorageProtocol.fetch_lost_trials + :noindex: + +:hidden:`fetch_pending_trials` +------------------------------ + +.. automethod:: orion.storage.base.BaseStorageProtocol.fetch_pending_trials + :noindex: + +:hidden:`fetch_noncompleted_trials` +----------------------------------- + +.. automethod:: orion.storage.base.BaseStorageProtocol.fetch_noncompleted_trials + :noindex: + +:hidden:`fetch_trials_by_status` +-------------------------------- + +.. automethod:: orion.storage.base.BaseStorageProtocol.fetch_trials_by_status + :noindex: + +:hidden:`count_completed_trials` +-------------------------------- + +.. automethod:: orion.storage.base.BaseStorageProtocol.count_completed_trials + :noindex: + +:hidden:`count_broken_trials` +----------------------------- + +.. automethod:: orion.storage.base.BaseStorageProtocol.count_broken_trials + :noindex: + +:hidden:`set_trial_status` +-------------------------- + +.. automethod:: orion.storage.base.BaseStorageProtocol.set_trial_status + :noindex: + + +.. _database_backend: + +Database +======== + +.. warning:: + + The database backends are not meant to be used directly by users. + Be careful if you use any method which modifies the data in database or + you may break your experiment or trials. + +The database backend used to be the sole database support +initially. An additional abstraction layer, the storage protocol, +has been added with the goal to support various storage types +such as third-party experiment management platforms which +could not be supported using the basic methods ``read`` +and ``write``. +This is why the database backend has been turned into +a legacy storage procotol. Because it is the default +storage protocol, we document it here for users +who may need even more flexibility than what the +storage protocol provides. + +There is two ways for creating the database client. If you +already created an experiment client, the database +was already created during the process of creating the +experiment client and you can get it with +:py:func:`orion.storage.legacy.get_database`. +Otherwise, you can create the database client with +:py:func:`orion.storage.legacy.setup_database` before +fetching it with +:py:func:`get_database() `. +To recap, you can create it indirectly with +:py:func:`create_experiment() ` +or directly with +:py:func:`setup_database() `. +In both case, you can access it with +:py:func:`get_database() `. + +Here's an example on how you could remove an experiment + +.. code-block:: python + + from orion.client import create_experiment + from orion.storage.legacy import get_database, setup_database + + # Create the ExperimentClient and database implicitly + experiment = create_experiment('exp-name', space=dict(x='uniform(0, 1)')) + + # Or create database explicitly using setup_database + setup_database(dict( + type='pickleddb', + host='db.pkl' + ) + ) + + # This gets the db singleton that was already instantiated within the experiment object. + db = get_database() + + # To remove all trials of an experiment + db.remove('trials', dict(experiment=experiment.id)) + + # To remove the experiment + db.remove('experiments', dict(_id=experiment.id)) + + +:hidden:`read` +-------------- + +.. automethod:: orion.core.io.database.Database.read + +:hidden:`write` +--------------- + +.. automethod:: orion.core.io.database.Database.write + +:hidden:`remove` +---------------- + +.. automethod:: orion.core.io.database.Database.remove + +:hidden:`read_and_write` +------------------------ + +.. automethod:: orion.core.io.database.Database.read_and_write diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 000000000..9da1fcf9c --- /dev/null +++ b/examples/README.md @@ -0,0 +1,5 @@ +# Examples + +In this folder, you will find the source code used in the examples from the [documentation](https://orion.readthedocs.io/en/develop/index.html). The scripts are ready to be used with Oríon right-away. Use them as inspiration for your projects or copy them directly into your development environment! + +Each folder corresponds to one example. Feel free to add some of your own! \ No newline at end of file diff --git a/examples/scikitlearn-iris/README.md b/examples/scikitlearn-iris/README.md new file mode 100644 index 000000000..9b3e04572 --- /dev/null +++ b/examples/scikitlearn-iris/README.md @@ -0,0 +1,8 @@ +# Scikit-learn example on the iris dataset +This folder contains a simple example script (main.py) used to showcase the simplicity of integrating + Oríon into an existing workflow. We encourage reading the example on the [documentation](https://orion.readthedocs.io/en/stable/tutorials/scikit-learn.html). + +## Pre-requisites +- Install the dependencies `$ pip install -r requirements.txt` +- Configure Oríon database ([documentation](https://orion.readthedocs.io/en/stable/install/database.html)) + diff --git a/examples/scikitlearn-iris/main.py b/examples/scikitlearn-iris/main.py new file mode 100755 index 000000000..3ab3b62a4 --- /dev/null +++ b/examples/scikitlearn-iris/main.py @@ -0,0 +1,32 @@ +import sys + +import numpy as np +from orion.client import report_objective +from sklearn.datasets import load_iris +from sklearn.linear_model import SGDClassifier +from sklearn.metrics import balanced_accuracy_score +from sklearn.model_selection import train_test_split + +# Make the execution reproducible +np.random.seed(1) + +# Parsing the value for the hyper-parameter 'epsilon' given as a command line argument. +hyper_epsilon = sys.argv[1] +print("Epsilon is {}".format(hyper_epsilon)) + +# Loading the iris dataset and splitting it into training and testing set. +X, y = load_iris(return_X_y=True) +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) + +# Training the model with the training set with the specified 'epsilon' to control the huber loss. +clf = SGDClassifier(loss='huber', epsilon=float(hyper_epsilon)) +clf.fit(X_train, y_train) + +# Evaluating the accuracy using the testing set. +y_pred = clf.predict(X_test) +accuracy = balanced_accuracy_score(y_test, y_pred) + +# Reporting the results +print("Accuracy is {}".format(accuracy)) + +report_objective(1 - accuracy) diff --git a/examples/scikitlearn-iris/requirements.txt b/examples/scikitlearn-iris/requirements.txt new file mode 100644 index 000000000..cfd3c8cec --- /dev/null +++ b/examples/scikitlearn-iris/requirements.txt @@ -0,0 +1,2 @@ +scikit-learn +orion \ No newline at end of file diff --git a/setup.py b/setup.py index 1b57b73a7..223c44dae 100644 --- a/setup.py +++ b/setup.py @@ -13,6 +13,7 @@ tests_require = [ 'pytest>=3.0.0' + 'scikit-learn' ] @@ -43,9 +44,12 @@ 'OptimizationAlgorithm': [ 'random = orion.algo.random:Random', 'asha = orion.algo.asha:ASHA', + 'hyperband = orion.algo.hyperband:Hyperband', + 'tpe = orion.algo.tpe:TPE', ], 'Storage': [ - 'legacy = orion.storage.legacy:Legacy' + 'track = orion.storage.track:Track', + 'legacy = orion.storage.legacy:Legacy', ] }, install_requires=['PyYAML', 'pymongo>=3', 'numpy', 'scipy', 'gitpython', 'filelock', @@ -79,7 +83,7 @@ 'Topic :: Scientific/Engineering', 'Topic :: Scientific/Engineering :: Artificial Intelligence', ] + [('Programming Language :: Python :: %s' % x) - for x in '3 3.5 3.6 3.7'.split()] + for x in '3 3.6 3.7 3.8'.split()] if __name__ == '__main__': setup(**setup_args) diff --git a/src/orion/algo/asha.py b/src/orion/algo/asha.py index e92b6512c..c6b4994e7 100644 --- a/src/orion/algo/asha.py +++ b/src/orion/algo/asha.py @@ -27,11 +27,34 @@ """ SPACE_ERROR = """ -ASHA cannot be used if space does contain a fidelity dimension. +ASHA can only be used if there is one fidelity dimension. For more information on the configuration and usage of ASHA, see https://orion.readthedocs.io/en/develop/user/algorithms.html#asha """ +BUDGET_ERROR = """ +Cannot build budgets below max_resources; +(max: {}) - (min: {}) > (num_rungs: {}) +""" + + +def compute_budgets(min_resources, max_resources, reduction_factor, num_rungs): + """Compute the budgets used for ASHA""" + budgets = numpy.logspace( + numpy.log(min_resources) / numpy.log(reduction_factor), + numpy.log(max_resources) / numpy.log(reduction_factor), + num_rungs, base=reduction_factor) + budgets = (budgets + 0.5).astype(int) + + for i in range(num_rungs - 1): + if budgets[i] >= budgets[i + 1]: + budgets[i + 1] = budgets[i] + 1 + + if budgets[-1] > max_resources: + raise ValueError(BUDGET_ERROR.format(min_resources, max_resources, num_rungs)) + + return list(budgets) + class ASHA(BaseAlgorithm): """Asynchronous Successive Halving Algorithm @@ -122,12 +145,14 @@ def __init__(self, space, seed=None, grace_period=None, max_resources=None, self.num_rungs = num_rungs - budgets = numpy.logspace( - numpy.log(min_resources) / numpy.log(reduction_factor), - numpy.log(max_resources) / numpy.log(reduction_factor), - num_rungs, base=reduction_factor).astype(int) + budgets = compute_budgets(min_resources, max_resources, reduction_factor, num_rungs) # Tracks state for new trial add + if num_brackets > num_rungs: + logger.warning("The input num_brackets %i is larger than the number of rungs %i, " + "set num_brackets as %i", num_brackets, num_rungs, num_rungs) + num_brackets = num_rungs + self.brackets = [ Bracket(self, reduction_factor, budgets[bracket_index:]) for bracket_index in range(num_brackets) @@ -174,20 +199,10 @@ def suggest(self, num=1): logger.debug('Promoting') return [candidate] - if all(bracket.is_filled for bracket in self.brackets): - logger.debug('All brackets are filled.') + point = self._grow_point_for_bottom_rung() + if not point: return None - for _attempt in range(100): - point = list(self.space.sample(1, seed=tuple(self.rng.randint(0, 1000000, size=3)))[0]) - if self.get_id(point) not in self.trial_info: - break - - if self.get_id(point) in self.trial_info: - raise RuntimeError( - 'ASHA keeps sampling already existing points. This should not happen, ' - 'please report this error to https://github.com/Epistimio/orion/issues') - sizes = numpy.array([len(b.rungs) for b in self.brackets]) probs = numpy.e**(sizes - sizes.max()) probs = numpy.array([prob * int(not bracket.is_filled) @@ -201,6 +216,34 @@ def suggest(self, num=1): return [tuple(point)] + def _grow_point_for_bottom_rung(self): + """Sample point for the bottom rung""" + if all(bracket.is_filled for bracket in self.brackets): + logger.warning('All brackets are filled.') + return None + + for _attempt in range(100): + point = list(self.space.sample(1, seed=tuple(self.rng.randint(0, 1000000, size=3)))[0]) + if self.get_id(point) not in self.trial_info: + break + + num_sample_trials = 0 + if self.get_id(point) in self.trial_info: + for bracket in self.brackets: + num_sample_trials += len(bracket.rungs[0][1]) + + if num_sample_trials >= self.space.cardinality: + logger.warning('The number of unique trials of bottom rungs exceeds the search ' + 'space cardinality %i, ASHA algorithm exits.', + self.space.cardinality) + return None + else: + raise RuntimeError( + 'ASHA keeps sampling already existing points. This should not happen, ' + 'please report this error to https://github.com/Epistimio/orion/issues') + + return point + def get_id(self, point): """Compute a unique hash for a point based on params, but not fidelity level.""" _point = list(point) diff --git a/src/orion/algo/base.py b/src/orion/algo/base.py index cd9311ca9..80e2d579c 100644 --- a/src/orion/algo/base.py +++ b/src/orion/algo/base.py @@ -10,6 +10,7 @@ """ from abc import (ABCMeta, abstractmethod) +import hashlib import logging from orion.core.utils import Factory @@ -17,10 +18,23 @@ log = logging.getLogger(__name__) +def infer_trial_id(point): + """Compute a hashing of a point""" + return hashlib.md5(str(list(point)).encode('utf-8')).hexdigest() + + # pylint: disable=too-many-public-methods class BaseAlgorithm(object, metaclass=ABCMeta): """Base class describing what an algorithm can do. + Parameters + ---------- + space : `orion.algo.space.Space` + Definition of a problem's parameter space. + kwargs : dict + Tunable elements of a particular algorithm, a dictionary from + hyperparameter names to values. + Notes ----- We are using the No Free Lunch theorem's [1]_[3]_ formulation of an @@ -85,19 +99,9 @@ def observe(self, points, results): requires = [] def __init__(self, space, **kwargs): - """Declare problem's parameter space and set up algo's hyperparameters. - - Parameters - ---------- - space : `orion.algo.space.Space` - Definition of a problem's parameter space. - kwargs : dict - Tunable elements of a particular algorithm, a dictionary from - hyperparameter names to values. - - """ log.debug("Creating Algorithm object of %s type with parameters:\n%s", type(self).__name__, kwargs) + self._trials_info = {} # Stores Unique Trial -> Result self._space = space self._param_names = list(kwargs.keys()) # Instantiate tunable parameters of an algorithm @@ -130,14 +134,14 @@ def seed_rng(self, seed): @property def state_dict(self): """Return a state dict that can be used to reset the state of the algorithm.""" - return {} + return {'_trials_info': self._trials_info} def set_state(self, state_dict): """Reset the state of the algorithm based on the given state_dict :param state_dict: Dictionary representing state of an algorithm """ - pass + self._trials_info = state_dict.get('_trials_info') @abstractmethod def suggest(self, num=1): @@ -162,7 +166,6 @@ def suggest(self, num=1): """ pass - @abstractmethod def observe(self, points, results): """Observe the `results` of the evaluation of the `points` in the process defined in user's script. @@ -188,11 +191,24 @@ def observe(self, points, results): or equal to zero by the problem's definition. """ - pass + for point, result in zip(points, results): + point_id = infer_trial_id(point) + + if point_id not in self._trials_info: + self._trials_info[point_id] = (point, result) @property def is_done(self): - """Return True, if an algorithm holds that there can be no further improvement.""" + """Return True, if an algorithm holds that there can be no further improvement. + By default, the cardinality of the specified search space will be used to check + if all possible sets of parameters has been tried. + """ + if len(self._trials_info) >= self.space.cardinality: + return True + + if len(self._trials_info) >= getattr(self, 'max_trials', float('inf')): + return True + return False def score(self, point): # pylint:disable=no-self-use,unused-argument diff --git a/src/orion/algo/hyperband.py b/src/orion/algo/hyperband.py new file mode 100644 index 000000000..d9a1b1d8c --- /dev/null +++ b/src/orion/algo/hyperband.py @@ -0,0 +1,520 @@ +# -*- coding: utf-8 -*- +""" +:mod:`orion.algo.hyperband` -- A Novel Bandit-Based Approach to Hyperparameter Optimization +=========================================================================================== + +.. module:: hyperband + :platform: Unix + :synopsis: Implement Hyperband to exploit configurations with fixed resource efficiently + +""" +import copy +import hashlib +import logging + +import numpy + +from orion.algo.base import BaseAlgorithm +from orion.algo.space import Fidelity + +logger = logging.getLogger(__name__) + +REGISTRATION_ERROR = """ +Bad fidelity level {fidelity}. Should be in {budgets}. +Params: {params} +""" + +SPACE_ERROR = """ +Hyperband cannot be used if space does not contain a fidelity dimension. +For more information on the configuration and usage of Hyperband, see +https://orion.readthedocs.io/en/develop/user/algorithms.html#hyperband +""" + +BUDGET_ERROR = """ +Cannot build budgets below max_resources; +(max: {}) - (min: {}) > (num_rungs: {}) +""" + + +def compute_budgets(max_resources, reduction_factor): + """Compute the budgets used for each execution of hyperband""" + num_brackets = int(numpy.log(max_resources) / numpy.log(reduction_factor)) + budgets = [] + budgets_tab = {} # just for display consideration + for bracket_id in range(0, num_brackets + 1): + bracket_budgets = [] + num_trials = int(numpy.ceil(int((num_brackets + 1) / (num_brackets - bracket_id + 1)) * + (reduction_factor ** (num_brackets - bracket_id)))) + + min_resources = max_resources / reduction_factor ** (num_brackets - bracket_id) + for i in range(0, num_brackets - bracket_id + 1): + n_i = int(num_trials / reduction_factor ** i) + min_i = int(min_resources * reduction_factor ** i) + bracket_budgets.append((n_i, min_i)) + + if budgets_tab.get(i): + budgets_tab[i].append((n_i, min_i)) + else: + budgets_tab[i] = [(n_i, min_i)] + + budgets.append(bracket_budgets) + + display_budgets(budgets_tab, max_resources, reduction_factor) + + return budgets + + +def display_budgets(budgets_tab, max_resources, reduction_factor): + """Display hyperband budget as a table in debug log""" + num_brackets = len(budgets_tab[0]) + table_str = 'Display Budgets:\n' + col_format_str = '{:<4}' + ' {:<12}' * num_brackets + '\n' + col_title_list = ['i '] + ['n_i r_i'] * num_brackets + col_sub_list = ['---'] + ['---------'] * num_brackets + table_str += col_format_str.format(*col_sub_list) + table_str += col_format_str.format(*col_title_list) + table_str += col_format_str.format(*col_sub_list) + + total_trials = 0 + for key, values in budgets_tab.items(): + table_row = '{:<4} '.format(key) + for value in values: + n_i, r_i = value + total_trials += n_i + st = '{:<5} {:<7}'.format(n_i, r_i) + table_row += st + table_str += table_row + '\n' + table_str += col_format_str.format(*col_sub_list) + table_str += 'max resource={}, eta={}, trials number of one execution={}\n' \ + .format(max_resources, reduction_factor, total_trials) + logger.debug(table_str) + + +class Hyperband(BaseAlgorithm): + """Hyperband formulates hyperparameter optimization as a pure-exploration non-stochastic + infinite-armed bandit problem where a predefined resource like iterations, data samples, + or features is allocated to randomly sampled configurations.` + + For more information on the algorithm, + see original paper at http://jmlr.org/papers/v18/16-558.html. + + Li, Lisha et al. "Hyperband: A Novel Bandit-Based Approach to Hyperparameter Optimization" + Journal of Machine Learning Research, 18:1-52, 2018. + + Parameters + ---------- + space: `orion.algo.space.Space` + Optimisation space with priors for each dimension. + seed: None, int or sequence of int + Seed for the random number generator used to sample new trials. + Default: ``None`` + repetitions: int + Number of execution of Hyperband. Default is numpy.inf which means to + run Hyperband until no new trials can be suggested. + + """ + + def __init__(self, space, seed=None, repetitions=numpy.inf): + self.brackets = [] + super(Hyperband, self).__init__(space, seed=seed, repetitions=repetitions) + + self.trial_info_wo_fidelity = {} # Stores Point id (with no fidelity) -> Bracket + + self.points_in_suggest_call = {} + + try: + fidelity_index = self.fidelity_index + except IndexError: + raise RuntimeError(SPACE_ERROR) + + fidelity_dim = space.values()[fidelity_index] + + self.max_resources = fidelity_dim.high + self.reduction_factor = fidelity_dim.base + + if self.reduction_factor < 2: + raise AttributeError("Reduction factor for Hyperband needs to be at least 2.") + + self.repetitions = repetitions + + # Counter for how many times Hyperband been executed + self.executed_times = 0 + + self.budgets = compute_budgets(self.max_resources, self.reduction_factor) + + self.brackets = [ + Bracket(self, bracket_budgets, 1) + for bracket_budgets in self.budgets + ] + + self.seed_rng(seed) + + def sample(self, num, bracket, buffer=10): + """Sample new points from bracket""" + sample_buffer = bracket.rungs[0]['n_trials'] * buffer + samples = self.space.sample(sample_buffer, seed=bracket.seed) + i = 0 + points = [] + while len(points) < num and i < sample_buffer: + point = samples[i] + + _id_wo_fidelity = self.get_id(point) + _bracket_observed = self.trial_info_wo_fidelity.get(_id_wo_fidelity) + + if _id_wo_fidelity not in self.points_in_suggest_call and \ + (not _bracket_observed or + (_bracket_observed.repetition_id < bracket.repetition_id and + _bracket_observed.get_point_max_resource(point) < + bracket.rungs[0]['resources'])): + # if no duplicated found or the duplicated found existing in previous hyperband + # execution with less resource + + point = list(point) + point[self.fidelity_index] = bracket.rungs[0]['resources'] + points.append(tuple(point)) + + self.points_in_suggest_call[_id_wo_fidelity] = bracket + + i += 1 + + return points + + def seed_rng(self, seed): + """Seed the state of the random number generator. + + :param seed: Integer seed for the random number generator. + """ + self.seed = seed + for i, bracket in enumerate(self.brackets): + bracket.seed_rng(seed + i if seed is not None else None) + self.rng = numpy.random.RandomState(seed) + + @property + def state_dict(self): + """Return a state dict that can be used to reset the state of the algorithm.""" + return {'rng_state': self.rng.get_state(), 'seed': self.seed, + 'executed_times': self.executed_times} + + def set_state(self, state_dict): + """Reset the state of the algorithm based on the given state_dict + + :param state_dict: Dictionary representing state of an algorithm + """ + self.seed_rng(state_dict['seed']) + self.rng.set_state(state_dict['rng_state']) + self.executed_times = state_dict['executed_times'] + + def suggest(self, num=1): + """Suggest a number of new sets of parameters. + + Sample new points until first rung is filled. Afterwards + waits for all trials to be completed before promoting trials + to the next rung. + + Parameters + ---------- + num: int, optional + Number of points to suggest. Defaults to 1. + + Returns + ------- + list of points or None + A list of lists representing points suggested by the algorithm. The algorithm may opt + out if it cannot make a good suggestion at the moment (it may be waiting for other + trials to complete), in which case it will return None. + + """ + samples = [] + # Temp persist of the points proposed inside this call + self.points_in_suggest_call = {} + for bracket in reversed(self.brackets): + if not bracket.is_filled: + samples += bracket.sample() + + if samples: + return samples + + # All brackets are filled + + for bracket in reversed(self.brackets): + if bracket.is_ready() and not bracket.is_done: + samples += bracket.promote() + + if samples: + return samples + + # Either all brackets are done or none are ready and algo needs to wait for some trials to + # complete + if len(self.trial_info_wo_fidelity) >= self.space.cardinality: + logger.warning('The number of unique trials of bottom rungs exceeds the search space ' + 'cardinality %i, Hyperband algorithm exits.', self.space.cardinality) + else: + logger.warning('Hyeprband can not suggest new samples, exit.') + + return None + + def get_id(self, point, ignore_fidelity=True): + """Compute a unique hash for a point based on params, without fidelity level by default.""" + _point = list(point) + if ignore_fidelity: + non_fidelity_dims = _point[0:self.fidelity_index] + non_fidelity_dims.extend(_point[self.fidelity_index + 1:]) + _point = non_fidelity_dims + + return hashlib.md5(str(_point).encode('utf-8')).hexdigest() + + def _refresh_bracket(self): + """Refresh bracket if one hyperband execution is done""" + if all(bracket.is_done for bracket in self.brackets): + self.executed_times = self.brackets[0].repetition_id + logger.debug('Hyperband execution %i is done, required to execute %s times', + self.executed_times, str(self.repetitions)) + + # Continue to the next execution if need + if self.executed_times < self.repetitions: + self.brackets = [ + Bracket(self, bracket_budgets, self.executed_times + 1) + for bracket_budgets in self.budgets + ] + if self.seed is not None: + self.seed += 1 + + def _get_bracket(self, point): + """Get the bracket of a point during observe""" + fidelity = point[self.fidelity_index] + _id_wo_fidelity = self.get_id(point) + + brackets = [] + for bracket in self.brackets: + # If find same point in first rung of a bracket, + # the point should register in this bracket + if _id_wo_fidelity in bracket.rungs[0]['results']: + brackets = [bracket] + break + + if not brackets: + # If the point show in current hyeprband execution the first time, + # the bracket with same fidelity in the first rung should be used, + # the assumption is that there is no duplicated points inside same hyperband execution. + brackets = [bracket for bracket in self.brackets + if bracket.rungs[0]['resources'] == fidelity] + + if not brackets: + raise ValueError("No bracket found for point {0} with fidelity {1}" + .format(_id_wo_fidelity, fidelity)) + + if len(brackets) > 1: + logger.warning("More than one bracket found for point %s, this should not happen", + str(point)) + + bracket = brackets[0] + + return bracket + + def observe(self, points, results): + """Observe evaluation `results` corresponding to list of `points` in + space. + + A simple random sampler though does not take anything into account. + """ + for point, result in zip(points, results): + + bracket = self._get_bracket(point) + + try: + bracket.register(point, result['objective']) + except IndexError: + logger.warning('Point registered to wrong bracket. This is likely due ' + 'to a corrupted database, where trials of different fidelity ' + 'have a wrong timestamps.') + continue + + _id_wo_fidelity = self.get_id(point) + if _id_wo_fidelity not in self.trial_info_wo_fidelity or \ + bracket.get_point_max_resource(point) > \ + self.trial_info_wo_fidelity[_id_wo_fidelity].get_point_max_resource(point): + # If the point show up the first time or it has the bigger resource value + # compared with the same point in previous hyperband execution + self.trial_info_wo_fidelity[_id_wo_fidelity] = bracket + + self._refresh_bracket() + + @property + def is_done(self): + """Return True, if all required execution been done.""" + if self.executed_times >= self.repetitions: + return True + return False + + @property + def fidelity_index(self): + """Compute the index of the point when fidelity is.""" + def _is_fidelity(dim): + return (isinstance(dim, Fidelity) or + (hasattr(dim, 'original_dimension') and + isinstance(dim.original_dimension, Fidelity))) + + return [i for i, dim in enumerate(self.space.values()) if _is_fidelity(dim)][0] + + +class Bracket(): + """Bracket of rungs for the algorithm Hyperband. + + Parameters + ---------- + hyperband: `Hyperband` algorithm + The hyperband algorithm object which this bracket will be part of. + budgets: list of tuple + Each tuple gives the (n_trials, resource_budget) for the respective rung. + repetition_id: int + The id of hyperband execution this bracket belongs to + + """ + + def __init__(self, hyperband, budgets, repetition_id): + self.hyperband = hyperband + self.rungs = [dict(resources=budget, n_trials=n_trials, results=dict()) + for n_trials, budget in budgets] + self.seed = None + self.repetition_id = repetition_id + + logger.debug('Bracket budgets: %s', str(budgets)) + + # points = hyperband.sample(compute_rung_sizes(reduction_factor, len(budgets))[0]) + # for point in points: + # self.register(point, None) + + @property + def is_filled(self): + """Return True if first rung with trials is filled""" + return self.has_rung_filled(0) + + def get_point_max_resource(self, point): + """Return the max resource value that has been tried for a point""" + max_resource = 0 + _id_wo_fidelity = self.hyperband.get_id(point) + for rung in self.rungs: + if _id_wo_fidelity in rung['results']: + max_resource = rung['resources'] + + return max_resource + + def sample(self): + """Sample a new trial with lowest fidelity""" + should_have_n_trials = self.rungs[0]['n_trials'] + n_trials = len(self.rungs[0]['results']) + return self.hyperband.sample(should_have_n_trials - n_trials, self) + + def register(self, point, objective): + """Register a point in the corresponding rung""" + fidelity = point[self.hyperband.fidelity_index] + rungs = [rung['results'] for rung in self.rungs if rung['resources'] == fidelity] + if not rungs: + budgets = [rung['resources'] for rung in self.rungs] + raise IndexError(REGISTRATION_ERROR.format(fidelity=fidelity, budgets=budgets, + params=point)) + + rungs[0][self.hyperband.get_id(point)] = (objective, point) + + def get_candidates(self, rung_id): + """Get a candidate for promotion""" + if self.has_rung_filled(rung_id + 1): + return [] + + rung = self.rungs[rung_id]['results'] + next_rung = self.rungs[rung_id + 1]['results'] + + rung = list(sorted((objective, point) for objective, point in rung.values())) + + should_have_n_trials = self.rungs[rung_id + 1]['n_trials'] + points = [] + i = 0 + while len(points) + len(next_rung) < should_have_n_trials: + objective, point = rung[i] + assert objective is not None + _id = self.hyperband.get_id(point) + if _id not in next_rung: + points.append(point) + i += 1 + + return points + + @property + def is_done(self): + """Return True, if the last rung is filled.""" + return self.has_rung_filled(len(self.rungs) - 1) + + def has_rung_filled(self, rung_id): + """Return True, if the rung[rung_id] is filled.""" + n_trials = len(self.rungs[rung_id]['results']) + return n_trials >= self.rungs[rung_id]['n_trials'] + + def is_ready(self, rung_id=None): + """Return True, if the bracket is ready for next promote""" + if rung_id is not None: + return (self.has_rung_filled(rung_id) and + all(objective is not None + for objective, _ in self.rungs[rung_id]['results'].values())) + + is_ready = False + for _rung_id in range(len(self.rungs)): + if self.has_rung_filled(_rung_id): + is_ready = self.is_ready(_rung_id) + else: + break + + return is_ready + + def promote(self): + """Promote the first candidate that is found and return it + + The rungs are iterated over in reversed order, so that high rungs + are prioritised for promotions. When a candidate is promoted, the loop is broken and + the method returns the promoted point. + + .. note :: + + All trials are part of the rungs, for any state. Only completed trials + are eligible for promotion, i.e., only completed trials can be part of top-k. + Lookup for promotion in rung l + 1 contains trials of any status. + + """ + if self.is_done: + return None + + for rung_id in range(len(self.rungs)): + if self.has_rung_filled(rung_id + 1): + continue + + if not self.is_ready(rung_id): + return None + + points = [] + for candidate in self.get_candidates(rung_id): + # pylint: disable=logging-format-interpolation + logger.debug( + 'Promoting {point} from rung {past_rung} with fidelity {past_fidelity} to ' + 'rung {new_rung} with fidelity {new_fidelity}'.format( + point=candidate, past_rung=rung_id, + past_fidelity=candidate[self.hyperband.fidelity_index], + new_rung=rung_id + 1, new_fidelity=self.rungs[rung_id + 1]['resources'])) + + candidate = list(copy.deepcopy(candidate)) + candidate[self.hyperband.fidelity_index] = self.rungs[rung_id + 1]['resources'] + points.append(tuple(candidate)) + + return points + + return None + + def seed_rng(self, seed): + """Seed the state of the random number generator. + + :param seed: Integer seed for the random number generator. + """ + self.seed = seed + + def __repr__(self): + """Return representation of bracket with fidelity levels""" + return 'Bracket(resource={}, repetition id={})' \ + .format([rung['resources'] for rung in self.rungs], self.repetition_id) diff --git a/src/orion/algo/random.py b/src/orion/algo/random.py index 0a4106164..573432c3c 100644 --- a/src/orion/algo/random.py +++ b/src/orion/algo/random.py @@ -10,7 +10,7 @@ """ import numpy -from orion.algo.base import BaseAlgorithm +from orion.algo.base import BaseAlgorithm, infer_trial_id class Random(BaseAlgorithm): @@ -35,13 +35,16 @@ def seed_rng(self, seed): @property def state_dict(self): """Return a state dict that can be used to reset the state of the algorithm.""" - return {'rng_state': self.rng.get_state()} + _state_dict = super(Random, self).state_dict + _state_dict['rng_state'] = self.rng.get_state() + return _state_dict def set_state(self, state_dict): """Reset the state of the algorithm based on the given state_dict :param state_dict: Dictionary representing state of an algorithm """ + super(Random, self).set_state(state_dict) self.seed_rng(0) self.rng.set_state(state_dict['rng_state']) @@ -54,12 +57,15 @@ def suggest(self, num=1): .. note:: New parameters must be compliant with the problem's domain `orion.algo.space.Space`. """ - return self.space.sample(num, seed=tuple(self.rng.randint(0, 1000000, size=3))) - - def observe(self, points, results): - """Observe evaluation `results` corresponding to list of `points` in - space. - - A simple random sampler though does not take anything into account. - """ - pass + points = [] + point_ids = set(self._trials_info.keys()) + i = 0 + while len(points) < num: + new_point = self.space.sample(1, seed=tuple(self.rng.randint(0, 1000000, size=3)))[0] + point_id = infer_trial_id(new_point) + if point_id not in point_ids: + point_ids.add(point_id) + points.append(new_point) + i += 1 + + return points diff --git a/src/orion/algo/space.py b/src/orion/algo/space.py index 536c76a1c..1b551e399 100644 --- a/src/orion/algo/space.py +++ b/src/orion/algo/space.py @@ -31,7 +31,7 @@ unless noted otherwise! """ - +import copy import logging import numbers @@ -200,8 +200,6 @@ def interval(self, alpha=1.0): then it will be attempted to calculate the interval from which a variable is `alpha`-likely to be drawn from. - .. note:: Lower bound is inclusive, upper bound is exclusive. - """ return self.prior.interval(alpha, *self._args, **self._kwargs) @@ -225,13 +223,28 @@ def __repr__(self): def get_prior_string(self): """Build the string corresponding to current prior""" - args = list(map(str, self._args[:])) - args += ["{}={}".format(k, v) for k, v in self._kwargs.items()] + args = copy.deepcopy(list(self._args[:])) + if self._prior_name == 'uniform' and len(args) == 2: + args[1] = args[0] + args[1] + args[0] = args[0] + + args = list(map(str, args)) + + for k, v in self._kwargs.items(): + if isinstance(v, str): + args += ["{}='{}'".format(k, v)] + else: + args += ["{}={}".format(k, v)] + if self._shape is not None: args += ['shape={}'.format(self._shape)] if self.default_value is not self.NO_DEFAULT_VALUE: args += ['default_value={}'.format(repr(self.default_value))] - return "{prior_name}({args})".format(prior_name=self._prior_name, args=", ".join(args)) + + prior_name = self._prior_name + if prior_name == 'reciprocal': + prior_name = 'loguniform' + return "{prior_name}({args})".format(prior_name=prior_name, args=", ".join(args)) def get_string(self): """Build the string corresponding to current dimension""" @@ -260,6 +273,11 @@ def type(self): """See `Dimension` attributes.""" return self.__class__.__name__.lower() + @property + def prior_name(self): + """Return the name of the prior""" + return self._prior_name + @property def shape(self): """Return the shape of dimension.""" @@ -274,6 +292,14 @@ def shape(self): **self._kwargs) return size + # pylint:disable=no-self-use + @property + def cardinality(self): + """Return the number of all the possible points from `Dimension`. + The default value is `numpy.inf`. + """ + return numpy.inf + def _is_numeric_array(point): """Test whether a point is numerical object or an array containing only numerical objects""" @@ -289,48 +315,48 @@ def _is_numeric(item): class Real(Dimension): - """Subclass of `Dimension` for representing real parameters. + """Search space dimension that can take on any real value. - Attributes + Parameters ---------- name : str - type : str - prior : `scipy.stats.distributions.rv_generic` - shape : tuple - See Attributes of `Dimension`. + prior : str + See Parameters of `Dimension.__init__`. + args : list + kwargs : dict + See Parameters of `Dimension.__init__` for general. + + Real kwargs (extra) + ------------------- low : float - Constrain with a lower bound (inclusive), default ``-numpy.inf``. - high : float - Constrain with an upper bound (exclusive), default ``numpy.inf``. + Lower bound (inclusive), optional; default ``-numpy.inf``. + high : float: + Upper bound (inclusive), optional; default ``numpy.inf``. + The upper bound must be inclusive because of rounding errors + during optimization which may cause values to round exactly + to the upper bound. + precision : int + Precision, optional; default ``4``. + shape : tuple + Defines how many dimensions are packed in this `Dimension`. + Describes the shape of the corresponding tensor. """ def __init__(self, name, prior, *args, **kwargs): - """Search space dimension that can take on any real value. - - Parameters - ---------- - name : str - prior : str - See Parameters of `Dimension.__init__`. - args : list - kwargs : dict - See Parameters of `Dimension.__init__` for general. - - Real kwargs (extra) - ------------------- - low : float - Lower bound (inclusive), optional; default ``-numpy.inf``. - high : float: - Upper bound (exclusive), optional; default ``numpy.inf``. - - """ self._low = kwargs.pop('low', -numpy.inf) self._high = kwargs.pop('high', numpy.inf) if self._high <= self._low: raise ValueError("Lower bound {} has to be less than upper bound {}" .format(self._low, self._high)) + precision = kwargs.pop('precision', 4) + if (isinstance(precision, int) and precision > 0) or precision is None: + self.precision = precision + else: + raise TypeError("Precision should be a non-negative int or None, " + "instead was {} of type {}.".format(precision, type(precision))) + super(Real, self).__init__(name, prior, *args, **kwargs) def __contains__(self, point): @@ -353,7 +379,7 @@ def __contains__(self, point): if point_.shape != self.shape: return False - return numpy.all(point_ < high) and numpy.all(point_ >= low) + return numpy.all(point_ >= low) and numpy.all(point_ <= high) def interval(self, alpha=1.0): """Return a tuple containing lower and upper bound for parameters. @@ -362,7 +388,7 @@ def interval(self, alpha=1.0): then it will be attempted to calculate the interval from which a variable is `alpha`-likely to be drawn from. - .. note:: Lower bound is inclusive, upper bound is exclusive. + .. note:: Both lower and upper bounds are inclusive. """ prior_low, prior_high = super(Real, self).interval(alpha) @@ -431,7 +457,7 @@ def interval(self, alpha=1.0): Bounds are integers. - .. note:: Lower bound is inclusive, upper bound is exclusive. + .. note:: Both lower and upper bounds are inclusive. """ low, high = super(_Discrete, self).interval(alpha) @@ -440,11 +466,9 @@ def interval(self, alpha=1.0): except OverflowError: # infinity cannot be converted to Python int type int_low = -numpy.inf try: - int_high = int(numpy.floor(high)) + int_high = int(numpy.ceil(high)) except OverflowError: # infinity cannot be converted to Python int type int_high = numpy.inf - if int_high < high: # Exclusive upper bound - int_high += 1 return (int_low, int_high) def __contains__(self, point): @@ -452,15 +476,28 @@ def __contains__(self, point): class Integer(Real, _Discrete): - """Subclass of `Dimension` for representing integer parameters. + """Search space dimension representing integer values. - Attributes + Parameters ---------- name : str - type : str - prior : `scipy.stats.distributions.rv_generic` + prior : str + See Parameters of `Dimension.__init__`. + args : list + kwargs : dict + See Parameters of `Dimension.__init__` for general. + + Real kwargs (extra) + ------------------- + low : float + Lower bound (inclusive), optional; default ``-numpy.inf``. + high : float: + Upper bound (inclusive), optional; default ``numpy.inf``. + precision : int + Precision, optional; default ``4``. shape : tuple - See Attributes of `Dimension`. + Defines how many dimensions are packed in this `Dimension`. + Describes the shape of the corresponding tensor. """ @@ -496,36 +533,53 @@ def cast(self, point): return casted_point + def get_prior_string(self): + """Build the string corresponding to current prior""" + prior_string = super(Integer, self).get_prior_string() + return prior_string[:-1] + ', discrete=True)' + + @property + def prior_name(self): + """Return the name of the prior""" + return 'int_{}'.format(super(Integer, self).prior_name) + + @property + def cardinality(self): + """Return the number of all the possible points from Integer `Dimension`""" + low, high = self.interval() + return _get_shape_cardinality(self.shape) * int(high - low) + + +def _get_shape_cardinality(shape): + """Get the cardinality in a shape which can be int or tuple""" + shape_cardinality = 1 + if shape is None: + return shape_cardinality + + if isinstance(shape, int): + shape = (shape, ) + + for cardinality in shape: + shape_cardinality *= cardinality + return shape_cardinality + class Categorical(Dimension): - """Subclass of `Dimension` for representing categorical parameters. + """Search space dimension that can take on categorical values. - Attributes + Parameters ---------- name : str - type : str - prior : `scipy.stats.distributions.rv_generic` - shape : tuple - See Attributes of `Dimension`. - categories : tuple - A set of unordered stuff to pick out from, except if enum + See Parameters of `Dimension.__init__`. + categories : dict or other iterable + A dictionary would associate categories to probabilities, else + it assumes to be drawn uniformly from the iterable. + kwargs : dict + See Parameters of `Dimension.__init__` for general. """ def __init__(self, name, categories, **kwargs): - """Search space dimension that can take on categorical values. - - Parameters - ---------- - name : str - See Parameters of `Dimension.__init__`. - categories : dict or other iterable - A dictionary would associate categories to probabilities, else - it assumes to be drawn uniformly from the iterable. - kwargs : dict - See Parameters of `Dimension.__init__` for general. - - """ if isinstance(categories, dict): self.categories = tuple(categories.keys()) self._probs = tuple(categories.values()) @@ -539,6 +593,11 @@ def __init__(self, name, categories, **kwargs): self._probs)) super(Categorical, self).__init__(name, prior, **kwargs) + @property + def cardinality(self): + """Return the number of all the possible values from Categorical `Dimension`""" + return len(self.categories) * _get_shape_cardinality(self._shape) + def sample(self, n_samples=1, seed=None): """Draw random samples from `prior`. @@ -552,15 +611,8 @@ def sample(self, n_samples=1, seed=None): return samples def interval(self, alpha=1.0): - """Return a tuple of possible values that this categorical dimension - can take. - - .. warning:: This method makes no sense for categorical variables. Use - ``self.categories`` instead. - - """ - raise RuntimeError("Categories have no ``interval`` (as they are not ordered).\n" - "Use ``self.categories`` instead.") + """Return a tuple of possible values that this categorical dimension can take.""" + return self.categories def __contains__(self, point): """Check if constraints hold for this `point` of `Dimension`. @@ -616,6 +668,11 @@ def get_prior_string(self): return 'choices({args})'.format(args=', '.join(args)) + @property + def prior_name(self): + """Return the name of the prior""" + return "choices" + def cast(self, point): """Cast a point to some category @@ -695,6 +752,14 @@ def default_value(self): """Return `high`""" return self.high + # pylint:disable=no-self-use + @property + def cardinality(self): + """Return cardinality of Fidelity dimension, leave it to 1 as Fidelity dimension + does not contribute to cardinality in a fixed way now. + """ + return 1 + def get_prior_string(self): """Build the string corresponding to current prior""" return 'fidelity({}, {}, {})'.format(self.low, self.high, self.base) @@ -705,7 +770,7 @@ def validate(self): def sample(self, n_samples=1, seed=None): """Do not do anything.""" - return [self.high] + return [self.high for i in range(n_samples)] def interval(self, alpha=1.0): """Do not do anything.""" @@ -773,11 +838,7 @@ def sample(self, n_samples=1, seed=None): return list(zip(*samples)) def interval(self, alpha=1.0): - """Return a list with the intervals for each contained dimension. - - .. note:: Lower bound is inclusive, upper bound is exclusive. - - """ + """Return a list with the intervals for each contained dimension.""" res = list() for dim in self.values(): if dim.type == 'categorical': @@ -857,6 +918,19 @@ def __iter__(self): """Return sorted keys""" return iter(sorted(super(Space, self).keys())) + @property + def configuration(self): + """Return a dictionary of priors.""" + return {name: dim.get_prior_string() for name, dim in self.items()} + + @property + def cardinality(self): + """Return the number of all all possible sets of samples in the space""" + capacities = 1 + for dim in self.values(): + capacities *= dim.cardinality + return capacities + def pack_point(point, space): """Take a list of points and pack it appropriately as a point from `space`. diff --git a/src/orion/algo/tpe.py b/src/orion/algo/tpe.py new file mode 100644 index 000000000..5a26a1d0b --- /dev/null +++ b/src/orion/algo/tpe.py @@ -0,0 +1,547 @@ +# -*- coding: utf-8 -*- +""" +:mod:`orion.algo.tpe` -- Tree-structured Parzen Estimator Approach +================================================================== + +.. module:: tpe + :platform: Unix + :synopsis: Tree-structured Parzen Estimator Approach + +""" +import logging + +import numpy +from scipy.stats import norm + +from orion.algo.base import BaseAlgorithm +from orion.core.utils.points import flatten_dims, regroup_dims + +logger = logging.getLogger(__name__) + + +def compute_max_ei_point(points, below_likelis, above_likelis): + """Compute ei among points based on their log likelihood and return the point with max ei. + + :param points: list of point with real values. + :param below_likelis: list of log likelihood for each point in the good GMM. + :param above_likelis: list of log likelihood for each point in the bad GMM. + """ + max_ei = -numpy.inf + point_index = 0 + for i, (lik_b, lik_a) in enumerate(zip(below_likelis, above_likelis)): + ei = lik_b - lik_a + if ei > max_ei: + max_ei = ei + point_index = i + return points[point_index] + + +def ramp_up_weights(total_num, flat_num, equal_weight): + """Adjust weights of observed trials. + + :param total_num: total number of observed trials. + :param flat_num: the number of the most recent trials which + get the full weight where the others will be applied with a linear ramp + from 0 to 1.0. It will only take effect if equal_weight is False. + :param equal_weight: whether all the observed trails share the same weights. + """ + if total_num < flat_num or equal_weight: + return numpy.ones(total_num) + + ramp_weights = numpy.linspace(1.0 / total_num, 1.0, num=total_num - flat_num) + flat_weights = numpy.ones(flat_num) + return numpy.concatenate([ramp_weights, flat_weights]) + + +# pylint:disable=assignment-from-no-return +def adaptive_parzen_estimator(mus, low, high, + prior_weight=1.0, + equal_weight=False, + flat_num=25): + """Return the sorted mus, the corresponding sigmas and weights with adaptive kernel estimator. + + This adaptive parzen window estimator is based on the original papers and also refer the use of + prior mean in `this implementation + `_. + + :param mus: list of real values for observed mus. + :param low: real value for lower bound of points. + :param high: real value for upper bound of points. + :param prior_weight: real value for the weight of the prior mean. + :param equal_weight: bool value indicating if all points with equal weights. + :param flat_num: int value indicating the number of the most recent trials which + get the full weight where the others will be applied with a linear ramp + from 0 to 1.0. It will only take effect if equal_weight is False. + """ + mus = numpy.asarray(mus) + + prior_mu = (low + high) * 0.5 + prior_sigma = (high - low) * 1.0 + + size = len(mus) + if size > 1: + order = numpy.argsort(mus) + sorted_mus = mus[order] + prior_mu_pos = numpy.searchsorted(sorted_mus, prior_mu) + + weights = ramp_up_weights(size, flat_num, equal_weight) + + mixture_mus = numpy.zeros(size + 1) + mixture_mus[:prior_mu_pos] = sorted_mus[:prior_mu_pos] + mixture_mus[prior_mu_pos] = prior_mu + mixture_mus[prior_mu_pos + 1:] = sorted_mus[prior_mu_pos:] + + mixture_weights = numpy.ones(size + 1) + mixture_weights[:prior_mu_pos] = weights[:prior_mu_pos] + mixture_weights[prior_mu_pos] = prior_weight + mixture_weights[prior_mu_pos + 1:] = weights[prior_mu_pos:] + + sigmas = numpy.ones(size + 1) + sigmas[0] = mixture_mus[1] - mixture_mus[0] + sigmas[-1] = mixture_mus[-1] - mixture_mus[-2] + sigmas[1:-1] = numpy.maximum((mixture_mus[1:-1] - mixture_mus[0:-2]), + (mixture_mus[2:] - mixture_mus[1:-1])) + sigmas = numpy.clip(sigmas, prior_sigma / max(10, numpy.sqrt(size)), prior_sigma) + + else: + if prior_mu < mus[0]: + + mixture_mus = numpy.array([prior_mu, mus[0]]) + sigmas = numpy.array([prior_sigma, prior_sigma * 0.5]) + mixture_weights = numpy.array([prior_weight, 1.0]) + else: + mixture_mus = numpy.array([mus[0], prior_mu]) + sigmas = numpy.array([prior_sigma * 0.5, prior_sigma]) + mixture_weights = numpy.array([1.0, prior_weight]) + + weights = mixture_weights / mixture_weights.sum() + + return mixture_mus, sigmas, weights + + +class TPE(BaseAlgorithm): + """Tree-structured Parzen Estimator (TPE) algorithm is one of Sequential Model-Based + Global Optimization (SMBO) algorithms, which will build models to propose new points based + on the historical observed trials. + + Instead of modeling p(y|x) like other SMBO algorithms, TPE models p(x|y) and p(y), + and p(x|y) is modeled by transforming that generative process, replacing the distributions of + the configuration prior with non-parametric densities. + + The TPE defines p(x|y) using two such densities l(x) and g(x) while l(x) is distribution of + good points and g(x) is the distribution of bad points. New point candidates will be sampled + with l(x) and Expected Improvement (EI) optimization scheme will be used to find the most + promising point among the candidates. + + For more information on the algorithm, see original papers at: + + - `Algorithms for Hyper-Parameter Optimization + `_ + - `Making a Science of Model Search: Hyperparameter Optimizationin Hundreds of Dimensions + for Vision Architectures `_ + + Parameters + ---------- + space: `orion.algo.space.Space` + Optimisation space with priors for each dimension. + seed: None, int or sequence of int + Seed to sample initial points and candidates points. + Default: ``None`` + n_initial_points: int + Number of initial points randomly sampled. + Default: ``20`` + n_ei_candidates: int + Number of candidates points sampled for ei compute. + Default: ``24`` + gamma: real + Ratio to split the observed trials into good and bad distributions. + Default: ``0.25`` + equal_weight: bool + True to set equal weights for observed points. + Default: ``False`` + prior_weight: int + The weight given to the prior point of the input space. + Default: ``1.0`` + full_weight_num: int + The number of the most recent trials which get the full weight where the others will be + applied with a linear ramp from 0 to 1.0. It will only take effect if equal_weight + is False. + + """ + + # pylint:disable=too-many-arguments + def __init__(self, space, seed=None, + n_initial_points=20, n_ei_candidates=24, + gamma=0.25, equal_weight=False, + prior_weight=1.0, full_weight_num=25): + + super(TPE, self).__init__(space, + seed=seed, + n_initial_points=n_initial_points, + n_ei_candidates=n_ei_candidates, + gamma=gamma, + equal_weight=equal_weight, + prior_weight=prior_weight, + full_weight_num=full_weight_num) + + for dimension in self.space.values(): + + if dimension.type != 'fidelity' and \ + dimension.prior_name not in ['uniform', 'reciprocal', 'int_uniform', 'choices']: + raise ValueError("TPE now only supports uniform, loguniform, uniform discrete " + "and choices as prior.") + + shape = dimension.shape + if shape and len(shape) != 1: + raise ValueError("TPE now only supports 1D shape.") + + if n_initial_points < 2: + n_initial_points = 2 + logger.warning('n_initial_points %s is not valid, set n_initial_points = 2', + str(n_initial_points)) + + if n_ei_candidates < 1: + n_ei_candidates = 1 + logger.warning('n_ei_candidates %s is not valid, set n_ei_candidates = 1', + str(n_ei_candidates)) + + self.seed_rng(seed) + + def seed_rng(self, seed): + """Seed the state of the random number generator. + + :param seed: Integer seed for the random number generator. + """ + self.rng = numpy.random.RandomState(seed) + + @property + def state_dict(self): + """Return a state dict that can be used to reset the state of the algorithm.""" + _state_dict = super(TPE, self).state_dict + + _state_dict['rng_state'] = self.rng.get_state() + _state_dict['seed'] = self.seed + return _state_dict + + def set_state(self, state_dict): + """Reset the state of the algorithm based on the given state_dict + + :param state_dict: Dictionary representing state of an algorithm + """ + super(TPE, self).set_state(state_dict) + + self.seed_rng(state_dict['seed']) + self.rng.set_state(state_dict['rng_state']) + + def suggest(self, num=1): + """Suggest a `num` of new sets of parameters. Randomly draw samples + from the import space and return them. + + :param num: how many sets to be suggested. + + .. note:: New parameters must be compliant with the problem's domain + `orion.algo.space.Space`. + """ + if num > 1: + raise ValueError("TPE should suggest only one point.") + + samples = [] + if len(self._trials_info) < self.n_initial_points: + new_point = self.space.sample(1, seed=tuple(self.rng.randint(0, 1000000, size=3)))[0] + samples.append(new_point) + else: + point = [] + below_points, above_points = self.split_trials() + + below_points = [flatten_dims(point, self.space) for point in below_points] + above_points = [flatten_dims(point, self.space) for point in above_points] + below_points = list(map(list, zip(*below_points))) + above_points = list(map(list, zip(*above_points))) + + idx = 0 + for dimension in self.space.values(): + + shape = dimension.shape + if not shape: + shape = (1,) + + if dimension.type == 'real': + points = self._sample_real_dimension(dimension, shape[0], + below_points[idx: idx + shape[0]], + above_points[idx: idx + shape[0]]) + elif dimension.type == 'integer' and dimension.prior_name == 'int_uniform': + points = self.sample_one_dimension(dimension, shape[0], + below_points[idx: idx + shape[0]], + above_points[idx: idx + shape[0]], + self._sample_int_point) + elif dimension.type == 'categorical' and dimension.prior_name == 'choices': + points = self.sample_one_dimension(dimension, shape[0], + below_points[idx: idx + shape[0]], + above_points[idx: idx + shape[0]], + self._sample_categorical_point) + elif dimension.type == 'fidelity': + # fidelity dimension + points = dimension.sample(num) + else: + raise NotImplementedError() + + if len(points) < shape[0]: + logger.warning('TPE failed to sample new point with configuration %s', + self.configuration) + return None + + idx += shape[0] + point += points + + point = regroup_dims(point, self.space) + samples.append(point) + + return samples + + # pylint:disable=no-self-use + def sample_one_dimension(self, dimension, shape_size, below_points, above_points, sampler): + """Sample values for a dimension + + :param dimension: Dimension. + :param shape_size: 1D Shape Size of the Real Dimension. + :param below_points: good points with shape (m, n), m=shape_size. + :param above_points: bad points with shape (m, n), m=shape_size. + :param sampler: method to sample one value for upon the dimension. + """ + points = [] + + for j in range(shape_size): + new_point = sampler(dimension, below_points[j], above_points[j]) + if new_point is not None: + points.append(new_point) + + return points + + def _sample_real_dimension(self, dimension, shape_size, below_points, above_points): + """Sample values for real dimension""" + if dimension.prior_name == 'uniform': + return self.sample_one_dimension(dimension, shape_size, below_points, above_points, + self._sample_real_point) + elif dimension.prior_name == 'reciprocal': + return self.sample_one_dimension(dimension, shape_size, below_points, above_points, + self._sample_loguniform_real_point) + else: + raise NotImplementedError() + + def _sample_loguniform_real_point(self, dimension, below_points, above_points): + """Sample one value for real dimension in a loguniform way""" + return self._sample_real_point(dimension, below_points, above_points, is_log=True) + + def _sample_real_point(self, dimension, below_points, above_points, is_log=False): + """Sample one value for real dimension based on the observed good and bad points""" + low, high = dimension.interval() + if is_log: + low = numpy.log(low) + high = numpy.log(high) + below_points = numpy.log(below_points) + above_points = numpy.log(above_points) + + below_mus, below_sigmas, below_weights = \ + adaptive_parzen_estimator(below_points, low, high, self.prior_weight, + self.equal_weight, flat_num=self.full_weight_num) + above_mus, above_sigmas, above_weights = \ + adaptive_parzen_estimator(above_points, low, high, self.prior_weight, + self.equal_weight, flat_num=self.full_weight_num) + + gmm_sampler_below = GMMSampler(self, below_mus, below_sigmas, + low, high, below_weights) + gmm_sampler_above = GMMSampler(self, above_mus, above_sigmas, + low, high, above_weights) + + candidate_points = gmm_sampler_below.sample(self.n_ei_candidates) + if candidate_points: + lik_blow = gmm_sampler_below.get_loglikelis(candidate_points) + lik_above = gmm_sampler_above.get_loglikelis(candidate_points) + new_point = compute_max_ei_point(candidate_points, lik_blow, lik_above) + + if is_log: + new_point = numpy.exp(new_point) + + return new_point + + return None + + def _sample_int_point(self, dimension, below_points, above_points): + """Sample one value for integer dimension based on the observed good and bad points""" + low, high = dimension.interval() + choices = range(low, high) + + below_points = numpy.array(below_points).astype(int) - low + above_points = numpy.array(above_points).astype(int) - low + + sampler_below = CategoricalSampler(self, below_points, choices) + candidate_points = sampler_below.sample(self.n_ei_candidates) + + if list(candidate_points): + sampler_above = CategoricalSampler(self, above_points, choices) + + lik_below = sampler_below.get_loglikelis(candidate_points) + lik_above = sampler_above.get_loglikelis(candidate_points) + + new_point = compute_max_ei_point(candidate_points, lik_below, lik_above) + new_point = new_point + low + return new_point + + return None + + def _sample_categorical_point(self, dimension, below_points, above_points): + """Sample one value for categorical dimension based on the observed good and bad points""" + choices = dimension.interval() + + below_points = [choices.index(point) for point in below_points] + above_points = [choices.index(point) for point in above_points] + + sampler_below = CategoricalSampler(self, below_points, choices) + candidate_points = sampler_below.sample(self.n_ei_candidates) + + if list(candidate_points): + sampler_above = CategoricalSampler(self, above_points, choices) + + lik_below = sampler_below.get_loglikelis(candidate_points) + lik_above = sampler_above.get_loglikelis(candidate_points) + + new_point_index = compute_max_ei_point(candidate_points, lik_below, lik_above) + new_point = choices[new_point_index] + + return new_point + return None + + def split_trials(self): + """Split the observed trials into good and bad ones based on the ratio `gamma``""" + sorted_trials = sorted(self._trials_info.values(), key=lambda x: x[1]['objective']) + sorted_points = [list(points) for points, results in sorted_trials] + + split_index = int(numpy.ceil(self.gamma * len(sorted_points))) + + below = sorted_points[:split_index] + above = sorted_points[split_index:] + + return below, above + + def observe(self, points, results): + """Observe evaluation `results` corresponding to list of `points` in + space. + + A simple random sampler though does not take anything into account. + """ + super(TPE, self).observe(points, results) + + +class GMMSampler(): + """Gaussian Mixture Model Sampler for TPE algorithm + + Parameters + ---------- + tpe: `TPE` algorithm + The tpe algorithm object which this sampler will be part of. + mus: list + mus for each Gaussian components in the GMM. + Default: ``None`` + sigmas: list + sigmas for each Gaussian components in the GMM. + low: real + Lower bound of the sampled points. + high: real + Upper bound of the sampled points. + weights: list + Weights for each Gaussian components in the GMM + Default: ``None`` + + """ + + def __init__(self, tpe, mus, sigmas, low, high, weights=None): + self.tpe = tpe + + self.mus = mus + self.sigmas = sigmas + self.low = low + self.high = high + self.weights = weights if weights is not None else len(mus) * [1.0 / len(mus)] + + self.pdfs = [] + self._build_mixture() + + def _build_mixture(self): + """Build the Gaussian components in the GMM""" + for mu, sigma in zip(self.mus, self.sigmas): + self.pdfs.append(norm(mu, sigma)) + + def sample(self, num=1): + """Sample required number of points""" + point = [] + for _ in range(num): + pdf = numpy.argmax(self.tpe.rng.multinomial(1, self.weights)) + new_points = self.pdfs[pdf].rvs(size=5) + for pt in new_points: + if self.low <= pt < self.high: + point.append(pt) + break + + return point + + def get_loglikelis(self, points): + """Return the log likelihood for the points""" + points = numpy.array(points) + weight_likelis = [numpy.log(self.weights[i] * pdf.pdf(points)) + for i, pdf in enumerate(self.pdfs)] + weight_likelis = numpy.array(weight_likelis) + # (num_weights, num_points) => (num_points, num_weights) + weight_likelis = weight_likelis.transpose() + + # log-sum-exp trick + max_likeli = numpy.nanmax(weight_likelis, axis=1) + point_likeli = max_likeli + numpy.log(numpy.nansum + (numpy.exp(weight_likelis - max_likeli[:, None]), + axis=1)) + + return point_likeli + + +class CategoricalSampler(): + """Categorical Sampler for discrete integer and categorical choices + + Parameters + ---------- + tpe: `TPE` algorithm + The tpe algorithm object which this sampler will be part of. + observations: list + Observed values in the dimension + choices: list + Candidate values for the dimension + + """ + + def __init__(self, tpe, observations, choices): + self.tpe = tpe + self.obs = observations + self.choices = choices + + self._build_multinomial_weights() + + def _build_multinomial_weights(self): + """Build weights for categorical distribution based on observations""" + weights_obs = ramp_up_weights(len(self.obs), + self.tpe.full_weight_num, self.tpe.equal_weight) + counts_obs = numpy.bincount(self.obs, minlength=len(self.choices), weights=weights_obs) + counts_obs = counts_obs + self.tpe.prior_weight + self.weights = counts_obs / counts_obs.sum() + + def sample(self, num=1): + """Sample required number of points""" + samples = self.tpe.rng.multinomial(n=1, pvals=self.weights, size=num) + + assert samples.shape == (num,) + (len(self.weights),) + + samples_index = samples.argmax(-1) + assert samples_index.shape == (num,) + + return samples_index + + def get_loglikelis(self, points): + """Return the log likelihood for the points""" + return numpy.log(numpy.asarray(self.weights)[points]) diff --git a/src/orion/client/__init__.py b/src/orion/client/__init__.py index b7cf5ee44..8ec578a09 100644 --- a/src/orion/client/__init__.py +++ b/src/orion/client/__init__.py @@ -1,48 +1,242 @@ # -*- coding: utf-8 -*- """ -:mod:`orion.client` -- Helper function for returning results from script -========================================================================== +:mod:`orion.client` -- Python API +================================= .. module:: client :platform: Unix :synopsis: Provides functions for communicating with `orion.core`. """ -import os +from orion.client.cli import ( + interrupt_trial, report_bad_trial, report_objective, report_results) +from orion.client.experiment import ExperimentClient +import orion.core.io.experiment_builder as experiment_builder +from orion.core.utils.exceptions import RaceCondition +from orion.core.utils.tests import update_singletons +from orion.core.worker.producer import Producer +from orion.storage.base import setup_storage -IS_ORION_ON = False -_HAS_REPORTED_RESULTS = False -RESULTS_FILENAME = os.getenv('ORION_RESULTS_PATH', None) -if RESULTS_FILENAME and os.path.isfile(RESULTS_FILENAME): - import json - IS_ORION_ON = True -if RESULTS_FILENAME and not IS_ORION_ON: - raise RuntimeWarning("Results file path provided in environmental variable " - "does not correspond to an existing file.") +__all__ = ['interrupt_trial', 'report_bad_trial', 'report_objective', 'report_results', + 'create_experiment', 'workon'] -def report_results(data): - """Facilitate the reporting of results for a user's script acting as a - black-box computation. +# pylint: disable=too-many-arguments +def create_experiment( + name, version=None, space=None, algorithms=None, + strategy=None, max_trials=None, storage=None, branching=None, + max_idle_time=None, heartbeat=None, working_dir=None, debug=False): + """Create an experiment - :param data: A dictionary containing experimental results + There is 2 main scenarios - .. note:: To be called only once in order to report a final evaluation - of a particular trial. + 1) The experiment is new - .. note:: In case that user's script is not running in a orion's context, - this function will act as a Python `print` function. + ``name`` and ``space`` arguments are required, otherwise ``NoConfigurationError`` will be + raised. - .. note:: For your own good, this can be called **only once**. + All other arguments (``algorithms``, ``strategy``, ``max_trials``, ``storage``, ``branching`` + and ``working_dir``) will be replaced by system's defaults if ommited. The system's defaults can + also be overriden in global configuration file as described for the database in + :ref:`Database Configuration`. We do not recommand overriding the algorithm configuration using + system's default, but overriding the storage configuration can be very convenient if the same + storage is used for all your experiments. + + 2) The experiment exist in the database. + + We can break down this scenario in two sub-scenarios for clarity. + + 2.1) Only experiment name is given. + + The configuration will be fetched from database. + + 2.2) Some other arguments than the name are given. + + The configuration will be fetched from database and given arguments will override them. + ``max_trials`` may be overwritten in DB, but any other changes will lead to a branching. Instead + of creating the experiment ``(name, version)``, it will create a new experiment + ``(name, version+1)`` which will have the same configuration than ``(name, version)`` except for + the differing arguments given by user. This new experiment will have access to trials of + ``(name, version)``, adapted according to the differences between ``version`` and ``version+1``. + A previous version can be accessed by specifying the ``version`` argument. + + Causes of experiment branching are: + + - Change of search space + + - New dimension + + - Different prior + + - Missing dimension + + - Change of algorithm + + - Change of strategy (Not implemented yet) + + - Change of code version (Only supported by commandline API for now) + + Parameters + ---------- + name: str + Name of the experiment + version: int, optional + Version of the experiment. Defaults to last existing version for a given ``name`` + or 1 for new experiment. + space: dict, optional + Optimization space of the algorithm. Should have the form ``dict(name='(args)')``. + algorithms: str or dict, optional + Algorithm used for optimization. + strategy: str or dict, optional + Parallel strategy to use to parallelize the algorithm. + max_trials: int, optional + Maximum number or trials before the experiment is considered done. + storage: dict, optional + Configuration of the storage backend. + working_dir: str, optional + Working directory created for the experiment inside which a unique folder will be created + for each trial. Defaults to a temporary directory that is deleted at end of execution. + max_idle_time: int, optional + Maximum time the producer can spend trying to generate a new suggestion. + Such timeout are generally caused by slow database, large number of + concurrent workers leading to many race conditions or small search spaces + with integer/categorical dimensions that may be fully explored. + Defaults to `orion.core.config.worker.max_idle_time`. + heartbeat: int, optional + Frequency (seconds) at which the heartbeat of the trial is updated. + If the heartbeat of a `reserved` trial is larger than twice the configured + heartbeat, Oríon will reset the status of the trial to `interrupted`. + This allows restoring lost trials (ex: due to killed worker). + Defaults to `orion.core.config.worker.max_idle_time`. + debug: bool, optional + If using in debug mode, the storage config is overrided with legacy:EphemeralDB. + Defaults to False. + branching: dict, optional + Arguments to control the branching. + + branch_to: str, optional + Name of the experiment to branch to. The parent experiment will be the one specified by + ``(name, version)``, and the child will be ``(branch_to, 1)``. + branch_from: str, optional + Name of the experiment to branch from. + The parent experiment will be the one specified by + ``(branch_from, last version)``, and the child will be ``(name, 1)``. + manual_resolution: bool, optional + Starts the prompt to resolve manually the conflicts. Defaults to False. + algorithm_change: bool, optional + Whether to automatically solve the algorithm conflict (change of algo config). + Defaults to True. + code_change_type: str, optional + How to resolve code change automatically. Must be one of 'noeffect', 'unsure' or + 'break'. Defaults to 'break'. + cli_change_type: str, optional + How to resolve cli change automatically. Must be one of 'noeffect', 'unsure' or 'break'. + Defaults to 'break'. + config_change_type: str, optional + How to resolve config change automatically. Must be one of 'noeffect', 'unsure' or + 'break'. Defaults to 'break'. + + Raises + ------ + `orion.core.utils.SingletonAlreadyInstantiatedError` + If the storage is already instantiated and given configuration is different. + Storage is a singleton, you may only use one instance per process. + `orion.core.utils.exceptions.NoConfigurationError` + The experiment is not in database and no space is provided by the user. + `orion.core.utils.exceptions.RaceCondition` + There was a race condition during branching and new version cannot be infered because of + that. Single race conditions are normally handled seemlessly. If this error gets raised, it + means that different modifications occured during each race condition resolution. This is + likely due to quick code change during experiment creation. Make sure your script is not + generating files within your code repository. + `orion.core.utils.exceptions.BranchingEvent` + The configuration is different than the corresponding one in DB and the branching cannot be + solved automatically. This usually happens if the version=x is specified but the experiment + ``(name, x)`` already has a child ``(name, x+1)``. If you really need to branch from version + ``x``, give it a new name to branch to with ``branching={'branch_to': }``. + `NotImplementedError` + If the algorithm, storage or strategy specified is not properly installed. + + """ + setup_storage(storage=storage, debug=debug) + + try: + experiment = experiment_builder.build( + name, version=version, space=space, algorithms=algorithms, + strategy=strategy, max_trials=max_trials, branching=branching, + working_dir=working_dir) + except RaceCondition: + # Try again, but if it fails again, raise. Race conditions due to version increment should + # only occur once in a short window of time unless code version is changing at a crazy pace. + try: + experiment = experiment_builder.build( + name, version=version, space=space, algorithms=algorithms, + strategy=strategy, max_trials=max_trials, branching=branching, + working_dir=working_dir) + except RaceCondition as e: + raise RaceCondition( + "There was a race condition during branching and new version cannot be infered " + "because of that. Single race conditions are normally handled seemlessly. If this " + "error gets raised, it means that different modifications occured during each race " + "condition resolution. This is likely due to quick code change during experiment " + "creation. Make sure your script is not generating files within your code " + "repository.") from e + + producer = Producer(experiment, max_idle_time) + + return ExperimentClient(experiment, producer, heartbeat) + + +def workon(function, space, name='loop', algorithms=None, max_trials=None): + """Optimize a function over a given search space + + This will create a new experiment with an in-memory storage and optimize the given function + until `max_trials` is reached or the `algorithm` is done + (some algorithms like random search are never done). + + For informations on how to fetch results, see + :py:class:`orion.client.experiment.ExperimentClient`. + + .. note:: + + Each call to this function will create a separate in-memory storage. + + Parameters + ---------- + name: str + Name of the experiment + version: int, optional + Version of the experiment. Defaults to last existing version for a given `name` + or 1 for new experiment. + space: dict, optional + Optimization space of the algorithm. Should have the form `dict(name='(args)')`. + algorithms: str or dict, optional + Algorithm used for optimization. + max_trials: int, optional + Maximum number or trials before the experiment is considered done. + + Raises + ------ + `NotImplementedError` + If the algorithm specified is not properly installed. """ - global _HAS_REPORTED_RESULTS # pylint:disable=global-statement - if _HAS_REPORTED_RESULTS: - raise RuntimeWarning("Has already reported evaluation results once.") - if IS_ORION_ON: - with open(RESULTS_FILENAME, 'w') as results_file: - json.dump(data, results_file) - else: - print(data) - _HAS_REPORTED_RESULTS = True + # Clear singletons and keep pointers to restore them. + singletons = update_singletons() + + setup_storage(storage={'type': 'legacy', 'database': {'type': 'EphemeralDB'}}) + + experiment = experiment_builder.build( + name, version=1, space=space, algorithms=algorithms, + strategy='NoParallelStrategy', max_trials=max_trials) + + producer = Producer(experiment) + + experiment_client = ExperimentClient(experiment, producer) + experiment_client.workon(function, max_trials=max_trials) + + # Restore singletons + update_singletons(singletons) + + return experiment_client diff --git a/src/orion/client/cli.py b/src/orion/client/cli.py new file mode 100644 index 000000000..4ae6af3e6 --- /dev/null +++ b/src/orion/client/cli.py @@ -0,0 +1,136 @@ +# -*- coding: utf-8 -*- +""" +:mod:`orion.client.cli` -- Helper function for returning results from script +============================================================================ + +.. module:: cli + :platform: Unix + :synopsis: Provides functions for communicating with `orion.core`. + +""" +import os +import sys + +from orion.core import config + + +IS_ORION_ON = False +_HAS_REPORTED_RESULTS = False +RESULTS_FILENAME = os.getenv('ORION_RESULTS_PATH', None) +if RESULTS_FILENAME and os.path.isfile(RESULTS_FILENAME): + import json + IS_ORION_ON = True + +if RESULTS_FILENAME and not IS_ORION_ON: + raise RuntimeWarning("Results file path provided in environmental variable " + "does not correspond to an existing file.") + + +def interrupt_trial(): + """Send interrupt signal to Oríon worker""" + sys.exit(config.worker.interrupt_signal_code) + + +def report_objective(objective, name='objective'): + """Report only the objective at the end of execution + + To send more data (statistic, constraint, gradient), use ``report_results``. + + .. warning:: + + To be called only once in order to report a final evaluation of a particular trial. + + .. warning:: + + Oríon is only minimizing. Make sure to report a metric that you seek to minimize. + + .. note:: + + In case that user's script is not running in a orion's context, + this function will act as a Python `print` function. + + Parameters + ---------- + objective: float + Objective the return to Oríon for the current trial. + name: str, optional + Name of the objective. Default is 'objective'. + + """ + report_results([dict(name=name, type='objective', value=objective)]) + + +def report_bad_trial(objective=1e10, name='objective', data=None): + """Report a bad trial with large objective to Oríon. + + This is especially useful if some parameter values lead to exceptions such as out of memory. + Reporting a large objective from such trials will push algorithms towards valid + configurations. + + .. warning:: + + To be called only once in order to report a final evaluation of a particular trial. + + .. warning:: + + Oríon is only minimizing. Make sure to report a metric that you seek to minimize. + + .. note:: + + In case that user's script is not running in a orion's context, + this function will act as a Python `print` function. + + Parameters + ---------- + objective: float + Objective the return to Oríon for the current trial. The default objective is 1e10. + This may not be valid for some metrics and this value should be overrided accordingly. In + the case of error rates for instance, the value should be 1.0. + name: str, optional + Name of the objective. Default is 'objective'. + data: list of dict, optional + A list of dictionary representing the results in the form + dict(name=result_name, type='statistic', value=0). The types supported are + 'contraint', 'gradient' and 'statistic'. + + """ + if data is None: + data = [] + report_results([dict(name=name, type='objective', value=objective)] + data) + + +def report_results(data): + """Facilitate the reporting of results for a user's script acting as a + black-box computation. + + .. warning:: + + To be called only once in order to report a final evaluation of a particular trial. + + .. warning:: + + Oríon is only minimizing. Make sure to report a metric that you seek to minimize. + + .. note:: + + In case that user's script is not running in a orion's context, + this function will act as a Python `print` function. + + Parameters + ---------- + data: list of dict + A list of dictionary representing the results in the form + dict(name=result_name, type='statistic', value=0). The types supported are + 'objective', 'contraint', 'gradient' and 'statistic'. The list should contain at least + one 'objective', which is the metric the algorithm will be minimizing. + + """ + global _HAS_REPORTED_RESULTS # pylint:disable=global-statement + if _HAS_REPORTED_RESULTS: + raise RuntimeWarning("Has already reported evaluation results once.") + if IS_ORION_ON: + with open(RESULTS_FILENAME, 'w') as results_file: + json.dump(data, results_file) + else: + print(data) + _HAS_REPORTED_RESULTS = True diff --git a/src/orion/client/experiment.py b/src/orion/client/experiment.py new file mode 100644 index 000000000..054c25166 --- /dev/null +++ b/src/orion/client/experiment.py @@ -0,0 +1,546 @@ +# -*- coding: utf-8 -*- +""" +:mod:`orion.client.experiment` -- Experiment wrapper client +=========================================================== + +.. module:: experiment + :platform: Unix + :synopsis: Wraps the core Experiment object to provide further functionalities for the user + +""" +import atexit +import functools +import logging +import sys + +from numpy import inf as infinity + +from orion.core.io.database import DuplicateKeyError +from orion.core.utils.exceptions import BrokenExperiment, SampleTimeout, WaitingForTrials +from orion.core.utils.flatten import flatten, unflatten +import orion.core.utils.format_trials as format_trials +import orion.core.worker +from orion.core.worker.trial import Trial +from orion.core.worker.trial_pacemaker import TrialPacemaker +from orion.storage.base import FailedUpdate + + +log = logging.getLogger(__name__) + + +def set_broken_trials(client): + """Release all trials with status broken if the process exits without releasing them.""" + if sys.exc_info()[0] is KeyboardInterrupt: + status = 'interrupted' + else: + status = 'broken' + + for trial_id in list(client._pacemakers.keys()): # pylint: disable=protected-access + trial = client.get_trial(uid=trial_id) + if trial is None: + log.warning('Trial {} was not found in storage, could not set status to `broken`.') + continue + client.release(trial, status=status) + + +# pylint: disable=too-many-public-methods +class ExperimentClient: + """ExperimentClient providing all functionalities for the python API + + Note that the ExperimentClient is not meant to be instantiated by the user. + Look at `orion.client.create_experiment` to build an ExperimentClient. + + Parameters + ---------- + experiment: `orion.core.worker.experiment.Experiment` + Experiment object serving for interaction with storage + producer: `orion.core.worker.producer.Producer` + Producer object used to produce new trials. + + """ + + def __init__(self, experiment, producer, heartbeat=None): + self._experiment = experiment + self._producer = producer + self._pacemakers = {} + self.set_broken_trials = functools.partial(set_broken_trials, client=self) + if heartbeat is None: + heartbeat = orion.core.config.worker.heartbeat + self.heartbeat = heartbeat + atexit.register(self.set_broken_trials) + + ### + # Attributes + ### + + @property + def name(self): + """Return the name of the experiment in the database.""" + return self._experiment.name + + # pylint: disable=invalid-name + @property + def id(self): + """Return the id of the experiment in the database.""" + return self._experiment.id + + @property + def version(self): + """Version of the experiment.""" + return self._experiment.version + + @property + def max_trials(self): + """Max-trials to execute before stopping the experiment.""" + return self._experiment.max_trials + + @property + def metadata(self): + """Metadata of the experiment.""" + return self._experiment.metadata + + @property + def space(self): + """Return problem's parameter `orion.algo.space.Space`.""" + return self._experiment.space + + @property + def algorithms(self): + """Algorithms of the experiment.""" + return self._experiment.algorithms + + @property + def is_done(self): + """Return True, if this experiment is considered to be finished. + + 1. Count how many trials have been completed and compare with `max_trials`. + 2. Ask `algorithms` if they consider there is a chance for further improvement. + """ + return self._experiment.is_done + + @property + def is_broken(self): + """Return True, if this experiment is considered to be broken. + + Count how many trials are broken and return True if that number has reached + as given threshold. + """ + return self._experiment.is_broken + + @property + def configuration(self): + """Return a copy of an `Experiment` configuration as a dictionary.""" + return self._experiment.configuration + + @property + def stats(self): + """Calculate a stats dictionary for this particular experiment. + + Returns + ------- + stats : dict + + Stats + ----- + trials_completed : int + Number of completed trials + best_trials_id : int + Unique identifier of the `Trial` object in the database which achieved + the best known objective result. + best_evaluation : float + Evaluation score of the best trial + start_time : `datetime.datetime` + When Experiment was first dispatched and started running. + finish_time : `datetime.datetime` + When Experiment reached terminating condition and stopped running. + duration : `datetime.timedelta` + Elapsed time. + + """ + return self._experiment.stats + + @property + def node(self): + """Node of the experiment in the version control tree.""" + return self._experiment.node + + @property + def working_dir(self): + """Working directory of the experiment.""" + return self._experiment.working_dir + + @property + def producer(self): + """Return the producer configuration of the experiment.""" + return self._experiment.producer + + ### + # Queries + ### + + def fetch_trials(self, with_evc_tree=False): + """Fetch all trials of the experiment + + Parameters + ---------- + with_evc_tree: bool, optional + Fetch all trials from the EVC tree. + Default: False + + """ + return self._experiment.fetch_trials(with_evc_tree=with_evc_tree) + + def get_trial(self, trial=None, uid=None): + """Fetch a single trial + + Parameters + ---------- + trial: Trial, optional + trial object to retrieve from the database + + uid: str, optional + trial id used to retrieve the trial object + + Returns + ------- + return none if the trial is not found, + + Raises + ------ + UndefinedCall + if both trial and uid are not set + + AssertionError + if both trial and uid are provided and they do not match + + """ + return self._experiment.get_trial(trial, uid) + + def fetch_trials_by_status(self, status, with_evc_tree=False): + """Fetch all trials with the given status + + Trials are sorted based on `Trial.submit_time` + + :return: list of `Trial` objects + """ + return self._experiment.fetch_trials_by_status(status, with_evc_tree=with_evc_tree) + + def fetch_noncompleted_trials(self, with_evc_tree=False): + """Fetch non-completed trials of this `Experiment` instance. + + Trials are sorted based on `Trial.submit_time` + + .. note:: + + It will return all non-completed trials, including new, reserved, suspended, + interrupted and broken ones. + + :return: list of non-completed `Trial` objects + """ + return self._experiment.fetch_noncompleted_trials(with_evc_tree=with_evc_tree) + + ### + # Actions + ### + + # pylint: disable=unused-argument + def insert(self, params, results=None, reserve=False): + """Insert a new trial. + + Parameters + ---------- + params: dict + Parameters of the new trial to add to the database. These parameters + must comply with the space definition otherwise a ValueError will be raised. + results: list, optional + Results to be set for the new trial. Results must have the format + {name: : type: <'objective', 'constraint' or 'gradient'>, value=} otherwise + a ValueError will be raised. + Note that passing results will mark the trial as completed and therefore cannot be + reserved. The returned trial will have status 'completed'. + If the results are invalid, the trial will still be inserted but reservation will be + released. + reserve: bool, optional + If reserve=True, the inserted trial will be reserved. `reserve` cannot be True if + `results` are given. + Defaults to False. + + Returns + ------- + `orion.core.worker.trial.Trial` + The trial inserted in storage. If `reserve=True` and no results are given, the returned + trial will be in a `reserved` status. + + Raises + ------ + `ValueError` + - If results are given and reserve=True + - If params have invalid format + - If results have invalid format + `orion.core.io.database.DuplicateKeyError` + - If a trial with identical params already exist for the current experiment. + + """ + if results and reserve: + raise ValueError( + 'Cannot observe a trial and reserve it. A trial with results has status ' + '`completed` and cannot be reserved.') + trial = format_trials.dict_to_trial(params, self.space) + try: + self._experiment.register_trial(trial, status='reserved') + self._maintain_reservation(trial) + except DuplicateKeyError as e: + message = 'A trial with params {} already exist for experiment {}-v{}'.format( + params, self.name, self.version) + raise DuplicateKeyError(message) from e + + if results: + try: + self.observe(trial, results) + except ValueError: + self._release_reservation(trial) + raise + + return trial + + if not reserve: + self.release(trial) + + return trial + + def reserve(self, trial): + """Reserve a trial. + + Set a trial status to reserve to ensure that concurrent process cannot work on it. + Trials can only be reserved with status 'new', 'interrupted' or 'suspended'. + + Parameters + ---------- + trial: `orion.core.worker.trial.Trial` + Trial to reserve. + + Raises + ------ + `RuntimeError` + If trial is reserved by another process + `ValueError` + If the trial does not exist in storage. + + Notes + ----- + When reserved, a `TrialPacemaker` is started to update an heartbeat in storage. The + frequency of the heartbeat is configurable at creation of experiment + or with `orion.core.config.worker.heartbeat`. + If the process terminates unexpectedly, the heartbeat will cease and remote processes + may reset the status of the trial to 'interrupted' when the heartbeat has not been updated + since twice the value of `heartbeat`. + + """ + if trial.status == 'reserved' and trial.id in self._pacemakers: + log.warning('Trial %s is already reserved.', trial.id) + return + elif trial.status == 'reserved' and trial.id not in self._pacemakers: + raise RuntimeError('Trial {} is already reserved by another process.'.format(trial.id)) + try: + self._experiment.set_trial_status(trial, 'reserved', heartbeat=self.heartbeat) + except FailedUpdate as e: + if self.get_trial(trial) is None: + raise ValueError('Trial {} does not exist in database.'.format(trial.id)) from e + raise RuntimeError('Could not reserve trial {}.'.format(trial.id)) from e + + self._maintain_reservation(trial) + + def release(self, trial, status='interrupted'): + """Release a trial. + + Release the reservation and stop the heartbeat. + + Parameters + ---------- + trial: `orion.core.worker.trial.Trial` + Trial to reserve. + status: str, optional + Set the trial to given status while releasing the reservation. + Defaults to 'interrupted'. + + Raises + ------ + `RuntimeError` + If reservation of the trial has been lost prior to releasing it. + `ValueError` + If the trial does not exist in storage. + + """ + try: + self._experiment.set_trial_status(trial, status) + except FailedUpdate as e: + if self.get_trial(trial) is None: + raise ValueError('Trial {} does not exist in database.'.format(trial.id)) from e + raise RuntimeError( + 'Reservation for trial {} has been lost before release.'.format(trial.id)) from e + finally: + self._release_reservation(trial) + + def suggest(self): + """Suggest a trial to execute. + + If any trial is available (new or interrupted), it selects one and reserves it. + Otherwise, the algorithm is used to generate a new trial that is registered in storage and + reserved. + + Returns + ------- + `orior.core.worker.trial.Trial` or None + Reserved trial for execution. Will return None if experiment is done. + of if the algorithm cannot suggest until other trials complete. + + Raises + ------ + `WaitingForTrials` + if the experiment is not completed and algorithm needs to wait for some + trials to complete before it can suggest new trials. + + `BrokenExperiment` + if too many trials failed to run and the experiment cannot continue. + This is determined by ``max_broken`` in the configuration of the experiment. + + `SampleTimeout` + if the algorithm of the experiment could not sample new unique points. + + """ + if self.is_broken: + raise BrokenExperiment("Trials failed too many times") + + if self.is_done: + return None + + try: + trial = orion.core.worker.reserve_trial(self._experiment, self._producer) + + except WaitingForTrials as e: + if self.is_broken: + raise BrokenExperiment("Trials failed too many times") from e + + raise e + + except SampleTimeout as e: + if self.is_broken: + raise BrokenExperiment("Trials failed too many times") from e + + raise e + + if trial is not None: + self._maintain_reservation(trial) + + return trial + + def observe(self, trial, results): + """Observe trial results + + Parameters + ---------- + trial: `orion.core.worker.trial.Trial` + Reserved trial to observe. + results: list + Results to be set for the new trial. Results must have the format + {name: : type: <'objective', 'constraint' or 'gradient'>, value=} otherwise + a ValueError will be raised. If the results are invalid, the trial will not be released. + + Returns + ------- + `orion.core.worker.trial.Trial` + The trial inserted in storage. If `reserve=True` and no results are given, the returned + trial will be in a `reserved` status. + + Raises + ------ + `ValueError` + - If results have invalid format + - If the trial does not exist in storage. + `RuntimeError` + If reservation of the trial has been lost prior to releasing it. + + """ + trial.results += [Trial.Result(**result) for result in results] + try: + self._experiment.update_completed_trial(trial) + self.release(trial, 'completed') + except FailedUpdate as e: + if self.get_trial(trial) is None: + raise ValueError('Trial {} does not exist in database.'.format(trial.id)) from e + + self._release_reservation(trial) + raise RuntimeError('Reservation for trial {} has been lost.'.format(trial.id)) from e + + def workon(self, fct, max_trials=infinity, **kwargs): + """Optimize a given function + + Parameters + ---------- + fct: callable + Function to optimize. Must take arguments provided by trial.params. Additional constant + parameter can be passed as ``**kwargs`` to `workon`. Function must return the final + objective. + max_trials: int, optional + Maximum number of trials to execute within `workon`. If the experiment or algorithm + reach status is_done before, the execution of `workon` terminates. + **kwargs + Constant argument to pass to `fct` in addition to trial.params. If values in kwargs are + present in trial.params, the latter takes precedence. + + Raises + ------ + `ValueError` + If results returned by `fct` have invalid format + + """ + trials = 0 + kwargs = flatten(kwargs) + while not self.is_done and trials < max_trials: + trial = self.suggest() + if trial is None: + log.warning('Algorithm could not sample new points') + return trials + kwargs.update(flatten(trial.params)) + results = fct(**unflatten(kwargs)) + self.observe(trial, results=results) + trials += 1 + + return trials + + def close(self): + """Verify that no reserved trials are remaining and unregister atexit().""" + if self._pacemakers: + raise RuntimeError("There is still reserved trials: {}\nRelease all trials before " + "closing the client, using " + "client.release(trial).".format(self._pacemakers.keys())) + + atexit.unregister(self.set_broken_trials) + + ### + # Private + ### + + def __repr__(self): + """Represent the object as a string.""" + return "Experiment(name=%s, version=%s)" % (self.name, self.version) + + def _verify_reservation(self, trial): + if trial.id not in self._pacemakers: + raise RuntimeError( + 'Trial {} had no pacemakers. Was is reserved properly?'.format(trial.id)) + + if self.get_trial(trial).status != 'reserved': + self._release_reservation(trial) + raise RuntimeError( + 'Reservation for trial {} has been lost.'.format(trial.id)) + + def _maintain_reservation(self, trial): + self._pacemakers[trial.id] = TrialPacemaker(trial) + self._pacemakers[trial.id].start() + + def _release_reservation(self, trial): + if trial.id not in self._pacemakers: + raise RuntimeError( + 'Trial {} had no pacemakers. Was is reserved properly?'.format(trial.id)) + self._pacemakers.pop(trial.id).stop() diff --git a/src/orion/client/manual.py b/src/orion/client/manual.py index e05cd2ef4..d4a93aba5 100644 --- a/src/orion/client/manual.py +++ b/src/orion/client/manual.py @@ -9,14 +9,24 @@ and link them with a particular existing experiment. """ -from orion.core.io.experiment_builder import ExperimentBuilder +import logging + +from orion.client import create_experiment from orion.core.utils import format_trials -def insert_trials(experiment_name, points, cmdconfig=None, raise_exc=True): +log = logging.getLogger(__name__) + + +def insert_trials(experiment_name, points, raise_exc=True): """Insert sets of parameters manually, defined in `points`, as new trials for the experiment name, `experiment_name`. + .. warning:: + + This function is deprecated and will be removed in 0.3.0. + You should use ExperimentClient.insert() instead. + :param experiment_name: Name of the experiment which the new trials are going to be associated with :param points: list of tuples in agreement with experiment's parameter space @@ -31,18 +41,15 @@ def insert_trials(experiment_name, points, cmdconfig=None, raise_exc=True): the database. """ - cmdconfig = cmdconfig if cmdconfig else {} - cmdconfig['name'] = experiment_name - - experiment_view = ExperimentBuilder().build_view_from({'config': cmdconfig}) + log.warning('insert_trials() is deprecated and will be removed in 0.3.0. ' + 'You should use ExperimentClient.insert() instead.') + experiment = create_experiment(experiment_name) valid_points = [] - print(experiment_view.space) - for point in points: try: - assert point in experiment_view.space + assert point in experiment.space valid_points.append(point) except AssertionError: if raise_exc: @@ -52,8 +59,8 @@ def insert_trials(experiment_name, points, cmdconfig=None, raise_exc=True): return new_trials = list( - map(lambda data: format_trials.tuple_to_trial(data, experiment_view.space), + map(lambda data: format_trials.tuple_to_trial(data, experiment.space), valid_points)) for new_trial in new_trials: - ExperimentBuilder().build_from(experiment_view.configuration).register_trial(new_trial) + experiment.insert(new_trial.params) diff --git a/src/orion/core/__init__.py b/src/orion/core/__init__.py index 24b24624c..4ad5676ca 100644 --- a/src/orion/core/__init__.py +++ b/src/orion/core/__init__.py @@ -35,7 +35,7 @@ __author__ = u'Epistímio' __author_short__ = u'Epistímio' __author_email__ = 'xavier.bouthillier@umontreal.ca' -__copyright__ = u'2017-2019, Epistímio' +__copyright__ = u'2017-2020, Epistímio' __url__ = 'https://github.com/epistimio/orion' DIRS = AppDirs(__name__, __author_short__) @@ -51,15 +51,36 @@ def define_config(): """Create and define the fields of the configuration object.""" config = Configuration() - define_database_config(config) + define_storage_config(config) + define_experiment_config(config) define_worker_config(config) + define_evc_config(config) config.add_option( - 'user_script_config', option_type=str, default='config') + 'user_script_config', option_type=str, default='config', + deprecate=dict(version='v0.3', alternative='worker.user_script_config')) + + config.add_option( + 'debug', option_type=bool, default=False, + help='Turn Oríon into debug mode. Storage will be overriden to in-memory EphemeralDB.') return config +def define_storage_config(config): + """Create and define the fields of the storage configuration.""" + storage_config = Configuration() + + storage_config.add_option( + 'type', option_type=str, default='legacy', env_var='ORION_STORAGE_TYPE') + + config.storage = storage_config + + define_database_config(config.storage) + # Backward compatibility, should be removed in v0.3.0, or not? + config.database = config.storage.database + + def define_database_config(config): """Create and define the fields of the database configuration.""" database_config = Configuration() @@ -70,31 +91,193 @@ def define_database_config(config): default_host = 'localhost' database_config.add_option( - 'name', option_type=str, default='orion', env_var='ORION_DB_NAME') + 'name', option_type=str, default='orion', env_var='ORION_DB_NAME', + help='Name of the database.') database_config.add_option( - 'type', option_type=str, default='MongoDB', env_var='ORION_DB_TYPE') + 'type', option_type=str, default='MongoDB', env_var='ORION_DB_TYPE', + help=('Type of database. Builtin backends are ``mongodb``, ' + '``pickleddb`` and ``ephemeraldb``.')) database_config.add_option( - 'host', option_type=str, default=default_host, env_var='ORION_DB_ADDRESS') + 'host', option_type=str, default=default_host, env_var='ORION_DB_ADDRESS', + help='URI for ``mongodb``, or file path for ``pickleddb``.') database_config.add_option( - 'port', option_type=int, default=27017, env_var='ORION_DB_PORT') + 'port', option_type=int, default=27017, env_var='ORION_DB_PORT', + help='Port address for ``mongodb``.') config.database = database_config +def define_experiment_config(config): + """Create and define the fields of generic experiment configuration.""" + experiment_config = Configuration() + + experiment_config.add_option( + 'max_trials', option_type=int, default=int(10e8), env_var='ORION_EXP_MAX_TRIALS', + help="number of trials to be completed for the experiment. This value " + "will be saved within the experiment configuration and reused " + "across all workers to determine experiment's completion. ") + + experiment_config.add_option( + 'worker_trials', option_type=int, default=int(10e8), + deprecate=dict(version='v0.3', alternative='worker.max_trials', + name='experiment.worker_trials'), + help="This argument will be removed in v0.3. Use --worker-max-trials instead.") + + experiment_config.add_option( + 'max_broken', option_type=int, default=3, env_var='ORION_EXP_MAX_BROKEN', + help=('Maximum number of broken trials before experiment stops.')) + + experiment_config.add_option( + 'working_dir', option_type=str, default='', env_var='ORION_WORKING_DIR', + help="Set working directory for running experiment.") + + experiment_config.add_option( + "pool_size", option_type=int, default=1, + deprecate=dict(version='v0.3', alternative=None, name='experiment.pool_size'), + help="This argument will be removed in v0.3.") + + experiment_config.add_option( + 'algorithms', option_type=dict, default={'random': {'seed': None}}, + help='Algorithm configuration for the experiment.') + + experiment_config.add_option( + 'strategy', option_type=dict, default={'MaxParallelStrategy': {}}, + help='Parallel strategy to use with the algorithm.') + + config.experiment = experiment_config + + def define_worker_config(config): """Create and define the fields of the worker configuration.""" worker_config = Configuration() worker_config.add_option( - 'heartbeat', option_type=int, default=120) + 'heartbeat', option_type=int, default=120, env_var='ORION_HEARTBEAT', + help=('Frequency (seconds) at which the heartbeat of the trial is updated. ' + 'If the heartbeat of a `reserved` trial is larger than twice the configured ' + 'heartbeat, Oríon will reset the status of the trial to `interrupted`. ' + 'This allows restoring lost trials (ex: due to killed worker).')) + worker_config.add_option( - 'max_broken', option_type=int, default=3) + 'max_trials', option_type=int, default=int(10e8), env_var='ORION_WORKER_MAX_TRIALS', + help="number of trials to be completed for this worker. " + "If the experiment is completed, the worker will die even if it " + "did not reach its maximum number of trials ") + worker_config.add_option( - 'max_idle_time', option_type=int, default=60) + 'max_broken', option_type=int, default=3, env_var='ORION_WORKER_MAX_BROKEN', + help=('Maximum number of broken trials before worker stops.')) + + worker_config.add_option( + 'max_idle_time', option_type=int, default=60, env_var='ORION_MAX_IDLE_TIME', + help=('Maximum time the producer can spend trying to generate a new suggestion.' + 'Such timeout are generally caused by slow database, large number of ' + 'concurrent workers leading to many race conditions or small search spaces ' + 'with integer/categorical dimensions that may be fully explored.')) + + worker_config.add_option( + 'interrupt_signal_code', option_type=int, default=130, env_var='ORION_INTERRUPT_CODE', + help='Signal returned by user script to signal to Oríon that it was interrupted.') + + # TODO: Will this support -config as well, or only --config? + worker_config.add_option( + 'user_script_config', option_type=str, default='config', + env_var='ORION_USER_SCRIPT_CONFIG', + help='Config argument name of user\'s script (--config).') config.worker = worker_config +def define_evc_config(config): + """Create and define the fields of the evc configuration.""" + evc_config = Configuration() + + # TODO: This should be built automatically like get_branching_args_group + # After this, the cmdline parser should be built based on config. + + evc_config.add_option( + 'auto_resolution', option_type=bool, default=True, + deprecate=dict(version='v0.3', alternative='evc.manual_resolution', + name='evc.auto_resolution'), + help="This argument will be removed in v0.3. " + "Conflicts are now resolved automatically by default. " + "See --manual-resolution to avoid auto-resolution.") + + evc_config.add_option( + 'manual_resolution', option_type=bool, default=False, + env_var='ORION_EVC_MANUAL_RESOLUTION', + help=("If ``True``, enter experiment version control conflict resolver for " + "manual resolution on branching events. Otherwise, auto-resolution is " + "attempted.")) + + evc_config.add_option( + 'non_monitored_arguments', option_type=list, default=[], + env_var='ORION_EVC_NON_MONITORED_ARGUMENTS', + help=("Ignore these commandline arguments when looking for differences in " + "user's commandline call. " + "Environment variable and commandline only supports one argument. " + "Use global config or local config to pass a list of arguments to ignore.")) + + evc_config.add_option( + 'ignore_code_changes', option_type=bool, default=False, + env_var='ORION_EVC_IGNORE_CODE_CHANGES', + help=("If ``True``, ignore code changes when looking for differences.")) + + evc_config.add_option( + 'algorithm_change', option_type=bool, default=False, + env_var='ORION_EVC_ALGO_CHANGE', + help=("Set algorithm change as resolved if a branching event occur. " + "Child and parent experiment have access to all trials from each other " + "when the only difference between them is the algorithm configuration.")) + + evc_config.add_option( + 'code_change_type', option_type=str, default='break', + env_var='ORION_EVC_CODE_CHANGE', + help=("One of ``break``, ``unsure`` or ``noeffet``. " + "Defines how trials should be filtered in Experiment Version Control tree " + "if there is a change in the user's code repository. " + "If the effect of the change is ``unsure``, " + "the child experiment will access the trials of the parent but not " + "the other way around. " + "This is to ensure parent experiment does not get corrupted with possibly " + "incompatible results. " + "The child cannot access the trials from parent if ``code_change_type`` " + "is ``break``. The parent cannot access trials from child if " + "``code_change_type`` is ``unsure`` or ``break``.")) + + evc_config.add_option( + 'cli_change_type', option_type=str, default='break', + env_var='ORION_EVC_CMDLINE_CHANGE', + help=("One of ``break``, ``unsure`` or ``noeffet``. " + "Defines how trials should be filtered in Experiment Version Control tree " + "if there is a change in the user's commandline call. " + "If the effect of the change is ``unsure``, " + "the child experiment will access the trials of the parent but not " + "the other way around. " + "This is to ensure parent experiment does not get corrupted with possibly " + "incompatible results. " + "The child cannot access the trials from parent if ``cli_change_type`` " + "is ``break``. The parent cannot access trials from child if " + "``cli_change_type`` is ``unsure`` or ``break``.")) + + evc_config.add_option( + 'config_change_type', option_type=str, default='break', + env_var='ORION_EVC_CONFIG_CHANGE', + help=("One of ``break``, ``unsure`` or ``noeffet``. " + "Defines how trials should be filtered in Experiment Version Control tree " + "if there is a change in the user's script. " + "If the effect of the change is ``unsure``, " + "the child experiment will access the trials of the parent but not " + "the other way around. " + "This is to ensure parent experiment does not get corrupted with possibly " + "incompatible results. " + "The child cannot access the trials from parent if ``config_change_type`` " + "is ``break``. The parent cannot access trials from child if " + "``config_change_type`` is ``unsure`` or ``break``.")) + + config.evc = evc_config + + def build_config(): """Define the config and fill it based on global configuration files.""" config = define_config() diff --git a/src/orion/core/cli/__init__.py b/src/orion/core/cli/__init__.py index c93ee7eb9..3e42c98e8 100644 --- a/src/orion/core/cli/__init__.py +++ b/src/orion/core/cli/__init__.py @@ -36,10 +36,10 @@ def main(argv=None): load_modules_parser(orion_parser) - orion_parser.execute(argv) - - return 0 + return orion_parser.execute(argv) if __name__ == "__main__": - main() + returncode = main() + if returncode > 0: + raise SystemExit(returncode) diff --git a/src/orion/core/cli/base.py b/src/orion/core/cli/base.py index 092f243ef..53049a205 100644 --- a/src/orion/core/cli/base.py +++ b/src/orion/core/cli/base.py @@ -10,11 +10,13 @@ """ import argparse import logging +import sys import textwrap import orion from orion.core.io.database import DatabaseError -from orion.core.utils.exceptions import NoConfigurationError +from orion.core.utils.exceptions import ( + BranchingEvent, MissingResultFile, NoConfigurationError, NoNameError) CLI_DOC_HEADER = """ @@ -48,7 +50,7 @@ def __init__(self, description=CLI_DOC_HEADER): '-d', '--debug', action='store_true', help="Use debugging mode with EphemeralDB.") - self.subparsers = self.parser.add_subparsers(help='sub-command help') + self.subparsers = self.parser.add_subparsers(dest='command', help='sub-command help') def get_subparsers(self): """Return the subparser object for this parser.""" @@ -64,7 +66,14 @@ def parse(self, argv): 2: logging.DEBUG} logging.basicConfig(level=levels.get(verbose, logging.DEBUG)) - function = args.pop('func') + if args['command'] is None: + self.parser.parse_args(['--help']) + + function = args.pop('func', None) + empty_command = (argv[-1] if argv else sys.argv[-1]) == args['command'] + if function is None or (empty_command and args.pop('help_empty', False)): + self.parser.parse_args([args['command'], '--help']) + return args, function def execute(self, argv): @@ -72,17 +81,29 @@ def execute(self, argv): try: args, function = self.parse(argv) function(args) - except NoConfigurationError: - print("Error: No commandline configuration found for new experiment.") - except DatabaseError as e: - print(e) + except (NoConfigurationError, NoNameError, DatabaseError, MissingResultFile, + BranchingEvent) as e: + print('Error:', e, file=sys.stderr) + + if args.get('verbose', 0) >= 2: + raise e + + return 1 + + except KeyboardInterrupt: + print('Orion is interrupted.') + return 130 + + return 0 -def get_basic_args_group(parser): +def get_basic_args_group( + parser, + group_name="Oríon arguments", + group_help="These arguments determine orion's behaviour"): """Return the basic arguments for any command.""" basic_args_group = parser.add_argument_group( - "Oríon arguments (optional)", - description="These arguments determine orion's behaviour") + group_name, description=group_help) basic_args_group.add_argument( '-n', '--name', diff --git a/src/orion/core/cli/checks/presence.py b/src/orion/core/cli/checks/presence.py index d6d08d0f6..eadbf4f63 100644 --- a/src/orion/core/cli/checks/presence.py +++ b/src/orion/core/cli/checks/presence.py @@ -10,67 +10,49 @@ """ +import orion.core +import orion.core.io.experiment_builder as experiment_builder +import orion.core.utils.backward as backward + class PresenceStage: """The presence stage of the checks.""" - def __init__(self, experiment_builder, cmdargs): - """Create an instance of the stage. - - Parameters - ---------- - experiment_builder: `ExperimentBuilder` - An instance of `ExperimentBuilder` to fetch configs. - - """ - self.builder = experiment_builder + def __init__(self, cmdargs): + """Create an instance of the stage.""" self.cmdargs = cmdargs self.db_config = {} def checks(self): """Return the registered checks.""" yield self.check_default_config - yield self.check_environment_vars yield self.check_configuration_file def check_default_config(self): """Check for a configuration inside the default paths.""" - config = self.builder.fetch_default_options() - - if 'database' not in config: - return "Skipping", "No default configuration found for database." - - self.db_config = config['database'] - print('\n ', self.db_config) + config = orion.core.config.to_dict() - return "Success", "" - - def check_environment_vars(self): - """Check for a configuration inside the environment variables.""" - config = self.builder.fetch_env_vars() - - config = config['database'] - names = ['type', 'name', 'host', 'port'] + backward.update_db_config(config) - if not any(name in config for name in names): - return "Skipping", "No environment variables found." + if 'database' not in config.get('storage', {}): + return "Skipping", "No default configuration found for database." - self.db_config.update(config) + self.db_config = config['storage']['database'] print('\n ', self.db_config) return "Success", "" def check_configuration_file(self): """Check if configuration file has valid database configuration.""" - config = self.builder.fetch_file_config(self.cmdargs) + config = experiment_builder.get_cmd_config(self.cmdargs) if not len(config): return "Skipping", "Missing configuration file." - if 'database' not in config: + if 'database' not in config.get('storage', {}): return "Skipping", "No database found in configuration file." - config = config['database'] + config = config['storage']['database'] names = ['type', 'name', 'host', 'port'] if not any(name in config for name in names): diff --git a/src/orion/core/cli/db/test.py b/src/orion/core/cli/db/test.py index b04851b98..ce3ba364a 100644 --- a/src/orion/core/cli/db/test.py +++ b/src/orion/core/cli/db/test.py @@ -15,7 +15,6 @@ from orion.core.cli.checks.creation import CreationStage from orion.core.cli.checks.operations import OperationsStage from orion.core.cli.checks.presence import PresenceStage -from orion.core.io.experiment_builder import ExperimentBuilder from orion.core.utils.exceptions import CheckError log = logging.getLogger(__name__) @@ -36,8 +35,7 @@ def add_subparser(parser): def main(args): """Run through all checks for database.""" - experiment_builder = ExperimentBuilder() - presence_stage = PresenceStage(experiment_builder, args) + presence_stage = PresenceStage(args) creation_stage = CreationStage(presence_stage) operations_stage = OperationsStage(creation_stage) stages = [presence_stage, creation_stage, operations_stage] diff --git a/src/orion/core/cli/db/upgrade.py b/src/orion/core/cli/db/upgrade.py index 3052a002a..c2b34e720 100644 --- a/src/orion/core/cli/db/upgrade.py +++ b/src/orion/core/cli/db/upgrade.py @@ -16,7 +16,7 @@ from orion.core.io.database.ephemeraldb import EphemeralCollection from orion.core.io.database.mongodb import MongoDB from orion.core.io.database.pickleddb import PickledDB -from orion.core.io.experiment_builder import ExperimentBuilder +import orion.core.io.experiment_builder as experiment_builder import orion.core.utils.backward as backward from orion.storage.base import get_storage from orion.storage.legacy import Legacy @@ -82,11 +82,15 @@ def main(args): if action in ['no', 'n']: sys.exit(0) - experiment_builder = ExperimentBuilder() - local_config = experiment_builder.fetch_full_config(args, use_db=False) - local_config['protocol'] = {'type': 'legacy', 'setup': False} + config = experiment_builder.get_cmd_config(args) + storage_config = config.get('storage') - experiment_builder.setup_storage(local_config) + if storage_config is None: + storage_config = {'type': 'legacy'} + + storage_config['setup'] = False + + experiment_builder.setup_storage(storage_config) storage = get_storage() @@ -115,7 +119,7 @@ def upgrade_documents(storage): """Upgrade scheme of the documents""" for experiment in storage.fetch_experiments({}): add_version(experiment) - add_priors(experiment) + add_space(experiment) storage.update_experiment(uid=experiment.pop('_id'), **experiment) @@ -124,9 +128,9 @@ def add_version(experiment): experiment.setdefault('version', 1) -def add_priors(experiment): - """Add priors to metadata if not present""" - backward.populate_priors(experiment['metadata']) +def add_space(experiment): + """Add space to metadata if not present""" + backward.populate_space(experiment) def update_indexes(database): diff --git a/src/orion/core/cli/evc.py b/src/orion/core/cli/evc.py index 9767d23f9..4db016c24 100644 --- a/src/orion/core/cli/evc.py +++ b/src/orion/core/cli/evc.py @@ -26,6 +26,25 @@ def _add_manual_resolution_argument(parser): help="Manually resolve conflicts") +def _add_non_monitored_arguments_argument(parser): + parser.add_argument( + "--non-monitored-arguments", type=str, nargs='*', + help="Ignore these arguments when looking for differences") + + +def _add_ignore_code_changes_argument(parser): + parser.add_argument( + "--ignore-code-changes", + action="store_true", + help="Ignore code changes when looking for differences") + + +def _add_branch_from_argument(parser): + parser.add_argument( + "--branch-from", type=str, + help="Create a new child based on experiment defined by `branch-from`") + + def _add_algorithm_argument(parser, resolution_class): parser.add_argument( resolution_class.ARGUMENT, @@ -54,7 +73,7 @@ def _add_config_argument(parser, resolution_class): help="Set configuration change type") -def _add_branching_argument(parser, resolution_class): +def _add_branch_to_argument(parser, resolution_class): parser.add_argument( '-b', resolution_class.ARGUMENT, metavar='stringID', help='Unique name for the new branching experiment') @@ -63,11 +82,14 @@ def _add_branching_argument(parser, resolution_class): resolution_arguments = { 'auto_resolution': _add_auto_resolution_argument, 'manual_resolution': _add_manual_resolution_argument, + 'non_monitored_arguments': _add_non_monitored_arguments_argument, + 'ignore_code_changes': _add_ignore_code_changes_argument, 'algorithm_change': _add_algorithm_argument, 'code_change_type': _add_code_argument, 'cli_change_type': _add_cli_argument, 'config_change_type': _add_config_argument, - 'branch': _add_branching_argument} + 'branch_from': _add_branch_from_argument, + 'branch_to': _add_branch_to_argument} UNDEFINED_PARSER_ERROR = ( @@ -82,7 +104,10 @@ def get_branching_args_group(parser): description="Arguments to automatically resolved branching events.") _add_manual_resolution_argument(branching_args_group) + _add_non_monitored_arguments_argument(branching_args_group) + _add_ignore_code_changes_argument(branching_args_group) _add_auto_resolution_argument(branching_args_group) + _add_branch_from_argument(branching_args_group) for resolution_class in sorted(Resolution.__subclasses__(), key=lambda cls: cls.__name__): if not resolution_class.ARGUMENT: diff --git a/src/orion/core/cli/hunt.py b/src/orion/core/cli/hunt.py index f0f02b975..837371c8f 100644 --- a/src/orion/core/cli/hunt.py +++ b/src/orion/core/cli/hunt.py @@ -12,11 +12,10 @@ import logging +import orion.core from orion.core.cli import base as cli from orion.core.cli import evc as evc_cli -from orion.core.io import resolve_config -from orion.core.io.evc_builder import EVCBuilder -from orion.core.io.experiment_builder import ExperimentBuilder +import orion.core.io.experiment_builder as experiment_builder from orion.core.worker import workon log = logging.getLogger(__name__) @@ -26,41 +25,31 @@ def add_subparser(parser): """Add the subparser that needs to be used for this command""" hunt_parser = parser.add_parser('hunt', help='hunt help') - orion_group = cli.get_basic_args_group(hunt_parser) + orion_group = cli.get_basic_args_group( + hunt_parser, group_name='Hunt arguments', group_help='') + + orion.core.config.experiment.add_arguments( + orion_group, + rename=dict(max_broken='--exp-max-broken', max_trials='--exp-max-trials')) orion_group.add_argument( '--max-trials', type=int, metavar='#', - help="number of trials to be completed for the experiment. This value " - "will be saved within the experiment configuration and reused " - "across all workers to determine experiment's completion. " - "(default: %s)" % resolve_config.DEF_CMD_MAX_TRIALS[1]) + help="(DEPRECATED) This argument will be removed in v0.3. Use --exp-max-trials instead") - orion_group.add_argument( - '--worker-trials', type=int, metavar='#', - help="number of trials to be completed for this worker. " - "If the experiment is completed, the worker will die even if it " - "did not reach its maximum number of trials " - "(default: %s)" % resolve_config.DEF_CMD_WORKER_TRIALS[1]) + worker_args_group = hunt_parser.add_argument_group( + "Worker arguments (optional)", + description="Arguments to automatically resolved branching events.") - orion_group.add_argument('--working-dir', type=str, - help="Set working directory for running experiment.") - - orion_group.add_argument( - "--pool-size", type=int, metavar='#', - help="number of simultaneous trials the algorithm should suggest. " - "This is useful if many workers are executed in parallel and the algorithm has a " - "strategy to sample non-independant trials simultaneously. Otherwise, it is better " - "to leave `pool_size` to 1 and set a Strategy for Oríon's producer. " - "Note that this option is not usefull useless you " - "know the algorithm have a strategy to produce multiple trials " - "simultaneously. If you have any doubt, leave it to 1. " - "(default: %s)" % resolve_config.DEF_CMD_POOL_SIZE[1]) + orion.core.config.worker.add_arguments( + worker_args_group, + rename=dict(max_broken='--worker-max-broken', max_trials='--worker-max-trials')) evc_cli.get_branching_args_group(hunt_parser) cli.get_user_args_group(hunt_parser) hunt_parser.set_defaults(func=main) + hunt_parser.set_defaults(help_empty=True) # Print help if command is empty return hunt_parser @@ -70,6 +59,10 @@ def main(args): args['root'] = None args['leafs'] = [] # TODO: simplify when parameter parsing is refactored - worker_trials = ExperimentBuilder().fetch_full_config(args)['worker_trials'] - experiment = EVCBuilder().build_from(args) - workon(experiment, worker_trials) + experiment = experiment_builder.build_from_args(args) + config = experiment_builder.get_cmd_config(args) + worker_config = orion.core.config.worker.to_dict() + if config.get('worker'): + worker_config.update(config.get('worker')) + + workon(experiment, **worker_config) diff --git a/src/orion/core/cli/info.py b/src/orion/core/cli/info.py index 299d3343c..8fbc172ce 100755 --- a/src/orion/core/cli/info.py +++ b/src/orion/core/cli/info.py @@ -13,7 +13,8 @@ import sys from orion.core.cli.base import get_basic_args_group -from orion.core.io.evc_builder import EVCBuilder +import orion.core.io.experiment_builder as experiment_builder +from orion.core.utils.format_terminal import format_info log = logging.getLogger(__name__) @@ -28,412 +29,12 @@ def add_subparser(parser): return info_parser -# pylint: disable=protected-access -def hack_until_config_is_refactored(experiment): - """Build the space and the algorithm""" - experiment._experiment._instantiate_config(experiment.configuration) - experiment._experiment._init_done = True - - def main(args): """Fetch config and info experiments""" try: - experiment = EVCBuilder().build_view_from(args) + experiment = experiment_builder.build_view_from_args(args) except ValueError: print('Experiment {} not found in db.'.format(args.get('name', None))) sys.exit(1) - hack_until_config_is_refactored(experiment) - print(format_info(experiment)) - - -INFO_TEMPLATE = """\ -{identification} - -{commandline} - -{configuration} - -{algorithm} - -{space} - -{metadata} - -{refers} - -{stats} -""" - - -def format_info(experiment): - """Render a string for all info of experiment""" - info_string = INFO_TEMPLATE.format( - identification=format_identification(experiment), - commandline=format_commandline(experiment), - configuration=format_config(experiment), - algorithm=format_algorithm(experiment), - space=format_space(experiment), - metadata=format_metadata(experiment), - refers=format_refers(experiment), - stats=format_stats(experiment)) - - return info_string - - -TITLE_TEMPLATE = """\ -{title} -{empty:=<{title_len}}\ -""" - - -def format_title(title): - """Render a title above an horizontal bar""" - title_string = TITLE_TEMPLATE.format( - title=title, - title_len=len(title), - empty='') - - return title_string - - -DICT_EMPTY_LEAF_TEMPLATE = "{tab}{key}\n" -DICT_LEAF_TEMPLATE = "{tab}{key}: {value}\n" -DICT_NODE_TEMPLATE = "{tab}{key}:\n{value}\n" - - -def format_dict(dictionary, depth=0, width=4, templates=None): - r"""Render a dict on multiple lines - - Parameters - ---------- - dictionary: dict - The dictionary to render - depth: int - Tab added at the beginning of every lines - width: int - Size of the tab added to each line, multiplied - by the depth of the object in the dict of dicts. - templates: dict - Templates for `empty_leaf`, `leaf` and `dict_node`. - Default is - `empty_leaf="{tab}{key}"` - `leaf="{tab}{key}: {value}\n"` - `dict_node="{tab}{key}:\n{value}\n"` - - Examples - ------- - >>> print(format_dict({1: {2: 3, 3: 4}, 2: {3: 4, 4: {5: 6}}})) - 1: - 2: 3 - 3: 4 - 2: - 3: 4 - 4: - 5: 6 - >>> templates = {'leaf': '{tab}{key}={value}\n', 'dict_node': '{tab}{key}:\n{value}\n'} - >>> print(format_dict({1: {2: 3, 3: 4}, 2: {3: 4, 4: {5: 6}}}, templates=templates)) - 1: - 2=3 - 3=4 - 2: - 3=4 - 4: - 5=6 - - """ - if isinstance(dictionary, (list, tuple)): - return format_list(dictionary, depth, width=width, templates=templates) - - # To avoid using mutable objects as default values in function signature. - if templates is None: - templates = dict() - - empty_leaf_template = templates.get('empty_leaf', DICT_EMPTY_LEAF_TEMPLATE) - leaf_template = templates.get('leaf', DICT_LEAF_TEMPLATE) - node_template = templates.get('dict_node', DICT_NODE_TEMPLATE) - - dict_string = "" - for key in sorted(dictionary.keys()): - tab = (" " * (depth * width)) - value = dictionary[key] - if isinstance(value, (dict, list, tuple)): - if not value: - dict_string += empty_leaf_template.format(tab=tab, key=key) - else: - subdict_string = format_dict( - value, depth + 1, width=width, templates=templates) - dict_string += node_template.format(tab=tab, key=key, value=subdict_string) - else: - dict_string += leaf_template.format(tab=tab, key=key, value=value) - - return dict_string.replace(' \n', '\n').rstrip("\n") - - -LIST_TEMPLATE = """\ -{tab}[ -{items} -{tab}]\ -""" -LIST_ITEM_TEMPLATE = "{tab}{item}\n" -LIST_NODE_TEMPLATE = "{item}\n" - - -def format_list(a_list, depth=0, width=4, templates=None): - r"""Render a list on multiple lines - - Parameters - ---------- - a_list: list - The list to render - depth: int - Tab added at the beginning of every lines - width: int - Size of the tab added to each line, multiplied - by the depth of the object in the list of lists. - templates: dict - Templates for `list`, `item` and `list_node`. - Default is - `list="{tab}[\n{items}\n{tab}]"` - `item="{tab}{item}\n"` - `list_node="{item}\n"` - - Examples - ------- - >>> print(format_list([1, [2, 3], 4, [5, 6, 7, 8]])) - [ - 1 - [ - 2 - 3 - ] - 4 - [ - 5 - 6 - 7 - 8 - ] - ] - >>> templates = {} - >>> templates['list'] = '{tab}\n{items}\n{tab}' - >>> templates['item'] = '{tab}- {item}\n' - >>> templates['list_node'] = '{tab}{item}\n' - >>> print(format_list([1, [2, 3], 4, [5, 6, 7, 8]], width=2, templates=templates)) - - 1 - - - 2 - - 3 - - - 4 - - - 5 - - 6 - - 7 - - 8 - - """ - # To avoid using mutable objects as default values in function signature. - if templates is None: - templates = dict() - - list_template = templates.get('list', LIST_TEMPLATE) - item_template = templates.get('item', LIST_ITEM_TEMPLATE) - node_template = templates.get('list_node', LIST_NODE_TEMPLATE) - - tab = (" " * (depth * width)) - list_string = "" - for i, item in enumerate(a_list, 1): - subtab = (" " * ((depth + 1) * width)) - if isinstance(item, (dict, list, tuple)): - item_string = format_dict(item, depth + 1, width=width, templates=templates) - list_string += node_template.format(tab=subtab, id=i, item=item_string) - else: - list_string += item_template.format(tab=subtab, id=i, item=item) - - return list_template.format(tab=tab, items=list_string.rstrip("\n")) - - -ID_TEMPLATE = """\ -{title} -name: {name} -version: {version} -user: {user} -""" - - -def format_identification(experiment): - """Render a string for identification section""" - identification_string = ID_TEMPLATE.format( - title=format_title("Identification"), - name=experiment.name, - version=experiment.version, - user=experiment.metadata['user']) - - return identification_string - - -COMMANDLINE_TEMPLATE = """\ -{title} -{commandline} -""" - - -def format_commandline(experiment): - """Render a string for commandline section""" - commandline_string = COMMANDLINE_TEMPLATE.format( - title=format_title("Commandline"), - commandline=" ".join(experiment.metadata['user_args'])) - - return commandline_string - - -CONFIG_TEMPLATE = """\ -{title} -pool size: {experiment.pool_size} -max trials: {experiment.max_trials} -""" - - -def format_config(experiment): - """Render a string for config section""" - config_string = CONFIG_TEMPLATE.format( - title=format_title("Config"), - experiment=experiment) - - return config_string - - -ALGORITHM_TEMPLATE = """\ -{title} -{configuration} -""" - - -def format_algorithm(experiment): - """Render a string for algorithm section""" - algorithm_string = ALGORITHM_TEMPLATE.format( - title=format_title("Algorithm"), - configuration=format_dict(experiment.configuration['algorithms'])) - - return algorithm_string - - -SPACE_TEMPLATE = """\ -{title} -{params} -""" - - -def format_space(experiment): - """Render a string for space section""" - space_string = SPACE_TEMPLATE.format( - title=format_title("Space"), - params="\n".join(name + ": " + experiment.space[name].get_prior_string() - for name in experiment.space.keys())) - - return space_string - - -METADATA_TEMPLATE = """\ -{title} -user: {experiment.metadata[user]} -datetime: {experiment.metadata[datetime]} -orion version: {experiment.metadata[orion_version]} -VCS: -{vcs} -""" - - -def format_metadata(experiment): - """Render a string for metadata section""" - metadata_string = METADATA_TEMPLATE.format( - title=format_title("Meta-data"), - experiment=experiment, - vcs=format_dict(experiment.metadata.get('VCS', {}), depth=1, width=2)) - - return metadata_string - - -REFERS_TEMPLATE = """\ -{title} -root: {root} -parent: {parent} -adapter: {adapter} -""" - - -def format_refers(experiment): - """Render a string for refers section""" - if experiment.node.root is experiment.node: - root = '' - parent = '' - adapter = '' - else: - root = experiment.node.root.name - parent = experiment.node.parent.name - adapter = "\n" + format_dict(experiment.refers['adapter'].configuration, depth=1, width=2) - - refers_string = REFERS_TEMPLATE.format( - title=format_title("Parent experiment"), - root=root, - parent=parent, - adapter=adapter) - - return refers_string - - -STATS_TEMPLATE = """\ -{title} -trials completed: {stats[trials_completed]} -best trial: - id: {stats[best_trials_id]} - evaluation: {stats[best_evaluation]} - params: -{best_params} -start time: {stats[start_time]} -finish time: {stats[finish_time]} -duration: {stats[duration]} -""" - - -NO_STATS_TEMPLATE = """\ -{title} -No trials executed... -""" - - -def format_stats(experiment): - """Render a string for stat section - - Parameters - ---------- - experiment: `orion.core.worker.experiment.Experiment` - templates: dict - templates for the title and `stats`. - See `format_title` for more info. - - """ - stats = experiment.stats - if not stats: - return NO_STATS_TEMPLATE.format( - title=format_title("Stats")) - - best_params = get_trial_params(stats['best_trials_id'], experiment) - - stats_string = STATS_TEMPLATE.format( - title=format_title("Stats"), - stats=stats, - best_params=format_dict(best_params, depth=2, width=2)) - - return stats_string - - -def get_trial_params(trial_id, experiment): - """Get params from trial_id in given experiment""" - best_trial = experiment.get_trial(uid=trial_id) - if not best_trial: - return {} - - return dict((param.name, param.value) for param in best_trial.params) diff --git a/src/orion/core/cli/init_only.py b/src/orion/core/cli/init_only.py index bdd44a6e4..93531eada 100644 --- a/src/orion/core/cli/init_only.py +++ b/src/orion/core/cli/init_only.py @@ -11,9 +11,10 @@ import logging +import orion.core from orion.core.cli import base as cli from orion.core.cli import evc as evc_cli -from orion.core.io.experiment_builder import ExperimentBuilder +import orion.core.io.experiment_builder as experiment_builder log = logging.getLogger(__name__) @@ -22,13 +23,23 @@ def add_subparser(parser): """Return the parser that needs to be used for this command""" init_only_parser = parser.add_parser('init_only', help='init_only help') - cli.get_basic_args_group(init_only_parser) + orion_group = cli.get_basic_args_group( + init_only_parser, group_name='init_only arguments', group_help='') + + orion.core.config.experiment.add_arguments( + orion_group, + rename=dict(max_broken='--exp-max-broken', max_trials='--exp-max-trials')) + + orion_group.add_argument( + '--max-trials', type=int, metavar='#', + help="(DEPRECATED) This argument will be removed in v0.3. Use --exp-max-trials instead") evc_cli.get_branching_args_group(init_only_parser) cli.get_user_args_group(init_only_parser) init_only_parser.set_defaults(func=main) + init_only_parser.set_defaults(help_empty=True) # Print help if command is empty return init_only_parser @@ -36,4 +47,4 @@ def add_subparser(parser): def main(args): """Build and initialize experiment""" # By building the experiment, we create a new experiment document in database - ExperimentBuilder().build_from(args) + experiment_builder.build_from_args(args) diff --git a/src/orion/core/cli/insert.py b/src/orion/core/cli/insert.py index ca98f3bcc..f8d6157b4 100644 --- a/src/orion/core/cli/insert.py +++ b/src/orion/core/cli/insert.py @@ -17,7 +17,7 @@ from orion.core.cli import base as cli from orion.core.io.convert import infer_converter_from_file_type -from orion.core.io.experiment_builder import ExperimentBuilder +import orion.core.io.experiment_builder as experiment_builder from orion.core.utils.format_trials import tuple_to_trial log = logging.getLogger(__name__) @@ -32,6 +32,7 @@ def add_subparser(parser): cli.get_user_args_group(insert_parser) insert_parser.set_defaults(func=main) + insert_parser.set_defaults(help_empty=True) # Print help if command is empty return insert_parser @@ -39,10 +40,9 @@ def add_subparser(parser): def main(args): """Fetch config and insert new point""" command_line_user_args = args.pop('user_args', [None])[1:] - # TODO: Views are not fully configured until configuration is refactored - experiment = ExperimentBuilder().build_view_from(args) - # TODO: Remove this line when views gets fully configured - experiment = ExperimentBuilder().build_from(args) + experiment_view = experiment_builder.build_view_from_args(args) + experiment = experiment_builder.build(name=experiment_view.name, + version=experiment_view.version) transformed_args = _build_from(command_line_user_args) exp_space = experiment.space diff --git a/src/orion/core/cli/list.py b/src/orion/core/cli/list.py index fb881bc58..7684fe3b8 100644 --- a/src/orion/core/cli/list.py +++ b/src/orion/core/cli/list.py @@ -10,8 +10,7 @@ import logging from orion.core.cli import base as cli -from orion.core.io.evc_builder import EVCBuilder -from orion.core.io.experiment_builder import ExperimentBuilder +import orion.core.io.experiment_builder as experiment_builder from orion.core.utils.pptree import print_tree from orion.storage.base import get_storage @@ -31,9 +30,8 @@ def add_subparser(parser): def main(args): """List all experiments inside database.""" - builder = ExperimentBuilder() - config = builder.fetch_full_config(args, use_db=False) - builder.setup_storage(config) + config = experiment_builder.get_cmd_config(args) + experiment_builder.setup_storage(config.get('storage')) query = {} @@ -48,7 +46,11 @@ def main(args): root_experiments = [exp for exp in experiments if exp['refers'].get('root_id', exp['_id']) == exp['_id']] + if not root_experiments: + print("No experiment found") + return + for root_experiment in root_experiments: - root = EVCBuilder().build_view_from({'name': root_experiment['name'], - 'version': root_experiment.get('version')}).node + root = experiment_builder.build_view(name=root_experiment['name'], + version=root_experiment.get('version')).node print_tree(root, nameattr='tree_name') diff --git a/src/orion/core/cli/status.py b/src/orion/core/cli/status.py index 085811bd1..ff8c6a04e 100644 --- a/src/orion/core/cli/status.py +++ b/src/orion/core/cli/status.py @@ -15,8 +15,7 @@ import tabulate from orion.core.cli import base as cli -from orion.core.io.evc_builder import EVCBuilder -from orion.core.io.experiment_builder import ExperimentBuilder +import orion.core.io.experiment_builder as experiment_builder from orion.storage.base import get_storage log = logging.getLogger(__name__) @@ -49,9 +48,8 @@ def add_subparser(parser): def main(args): """Fetch config and status experiments""" - builder = ExperimentBuilder() - local_config = builder.fetch_full_config(args, use_db=False) - builder.setup_storage(local_config) + config = experiment_builder.get_cmd_config(args) + experiment_builder.setup_storage(config.get('storage')) args['all_trials'] = args.pop('all', False) @@ -70,7 +68,7 @@ def main(args): raise RuntimeError("Cannot fetch specific version of experiments with --collapse " "or --expand-versions.") - print_evc(filter(lambda e: e.refers.get('parent_id') is None, experiments), **args) + print_evc(experiments, **args) # pylint: disable=unused-argument @@ -85,8 +83,7 @@ def print_evc(experiments, version=None, all_trials=False, collapse=False, """ for exp in experiments: - cfg = {'name': exp.name, 'version': version} - experiment = EVCBuilder().build_view_from(cfg) + experiment = experiment_builder.build_view(exp.name, version) if version is None: expand_experiment = exp else: @@ -107,13 +104,19 @@ def get_experiments(args): Commandline arguments. """ - projection = {'name': 1, 'version': 1} + projection = {'name': 1, 'version': 1, 'refers': 1} query = {'name': args['name']} if args.get('name') else {} experiments = get_storage().fetch_experiments(query, projection) - return [EVCBuilder().build_view_from({'name': exp['name'], 'version': exp.get('version', 1)}) - for exp in experiments] + if args['name']: + root_experiments = experiments + else: + root_experiments = [exp for exp in experiments + if exp['refers'].get('root_id', exp['_id']) == exp['_id']] + + return [experiment_builder.build_view(name=exp['name'], version=exp.get('version', 1)) + for exp in root_experiments] def _has_named_children(exp): diff --git a/src/orion/core/evc/adapters.py b/src/orion/core/evc/adapters.py index 8763100d6..5df644e13 100644 --- a/src/orion/core/evc/adapters.py +++ b/src/orion/core/evc/adapters.py @@ -217,7 +217,7 @@ def apply_if_valid(name, trial, callback=None, raise_if_not=True): Else, output of callback(trial, item). """ - for param in trial.params: + for param in trial._params: # pylint: disable=protected-access if param.name == name: return callback is None or callback(trial, param) @@ -282,7 +282,8 @@ def forward(self, trials): (self.param.name, trial)) adapted_trial = copy.deepcopy(trial) - adapted_trial.params.append(copy.deepcopy(self.param)) + # pylint: disable=protected-access + adapted_trial._params.append(copy.deepcopy(self.param)) adapted_trials.append(adapted_trial) return adapted_trials @@ -300,7 +301,8 @@ def remove_dimension(trial, param): """Remove the param and keep the trial if param has default value""" if param == self.param: adapted_trial = copy.deepcopy(trial) - del adapted_trial.params[adapted_trial.params.index(self.param)] + # pylint: disable=protected-access + del adapted_trial._params[adapted_trial._params.index(self.param)] adapted_trials.append(adapted_trial) return True diff --git a/src/orion/core/evc/conflicts.py b/src/orion/core/evc/conflicts.py index 98390cbac..90d03f173 100644 --- a/src/orion/core/evc/conflicts.py +++ b/src/orion/core/evc/conflicts.py @@ -76,7 +76,9 @@ def _build_extended_user_args(config): """ user_args = config['metadata']['user_args'] - parser = OrionCmdlineParser(orion.core.config.user_script_config) + # No need to pass config_prefix because we have access to everything + # required in config[metadata][parser] (parsed data) + parser = OrionCmdlineParser() parser.set_state_dict(config['metadata']['parser']) return user_args + [standard_param_name(key) + value @@ -86,16 +88,17 @@ def _build_extended_user_args(config): def _build_space(config): """Build an optimization space based on given configuration""" space_builder = SpaceBuilder() - space = space_builder.build(config['metadata']['priors']) + space_config = config['space'] + space = space_builder.build(space_config) return space -def detect_conflicts(old_config, new_config): +def detect_conflicts(old_config, new_config, branching=None): """Generate a Conflicts object with all conflicts found in pair (old_config, new_config)""" conflicts = Conflicts() for conflict_class in sorted(Conflict.__subclasses__(), key=lambda cls: cls.__name__): - for conflict in conflict_class.detect(old_config, new_config): + for conflict in conflict_class.detect(old_config, new_config, branching): conflicts.register(conflict) return conflicts @@ -306,8 +309,10 @@ class Conflict(object, metaclass=ABCMeta): """ @classmethod - def detect(cls, old_config, new_config): - """Detect all conflicts in given pair (old_config, new_config) and return a list of them""" + def detect(cls, old_config, new_config, branching_config=None): + """Detect all conflicts in given pair (old_config, new_config) and return a list of them + :param branching_config: + """ pass def __init__(self, old_config, new_config): @@ -323,7 +328,7 @@ def is_resolved(self): return self._is_resolved or self.resolution is not None # pylint:disable=unused-argument,no-self-use - def get_marked_arguments(self, conflicts): + def get_marked_arguments(self, conflicts, **branching_kwargs): """Return arguments from marked resolutions in new configuration Some conflicts may be passed arguments with their marker to automate conflict resolution. @@ -527,8 +532,10 @@ class NewDimensionConflict(Conflict): """ @classmethod - def detect(cls, old_config, new_config): - """Detect all new dimensions in `new_config` based on `old_config`""" + def detect(cls, old_config, new_config, branching_config=None): + """Detect all new dimensions in `new_config` based on `old_config` + :param branching_config: + """ old_space = _build_space(old_config) new_space = _build_space(new_config) for name, dim in new_space.items(): @@ -657,8 +664,10 @@ class ChangedDimensionConflict(Conflict): """ @classmethod - def detect(cls, old_config, new_config): - """Detect all changed dimensions in `new_config` based on `old_config`""" + def detect(cls, old_config, new_config, branching_config=None): + """Detect all changed dimensions in `new_config` based on `old_config` + :param branching_config: + """ old_space = _build_space(old_config) new_space = _build_space(new_config) for name, dim in new_space.items(): @@ -741,8 +750,10 @@ class MissingDimensionConflict(Conflict): """ @classmethod - def detect(cls, old_config, new_config): - """Detect all missing dimensions in `new_config` based on `old_config`""" + def detect(cls, old_config, new_config, branching_config=None): + """Detect all missing dimensions in `new_config` based on `old_config` + :param branching_config: + """ for conflict in NewDimensionConflict.detect(new_config, old_config): yield cls(old_config, new_config, conflict.dimension, conflict.prior) @@ -752,7 +763,7 @@ def __init__(self, old_config, new_config, dimension, prior): self.dimension = dimension self.prior = prior - def get_marked_arguments(self, conflicts): + def get_marked_arguments(self, conflicts, **branching_kwargs): """Find and return marked arguments for remove or rename resolution .. seealso:: @@ -1032,8 +1043,10 @@ class AlgorithmConflict(Conflict): """ @classmethod - def detect(cls, old_config, new_config): - """Detect if algorithm definition in `new_config` differs from `old_config`""" + def detect(cls, old_config, new_config, branching_config=None): + """Detect if algorithm definition in `new_config` differs from `old_config` + :param branching_config: + """ if old_config['algorithms'] != new_config['algorithms']: yield cls(old_config, new_config) @@ -1090,15 +1103,19 @@ class CodeConflict(Conflict): """ @classmethod - def detect(cls, old_config, new_config): - """Detect if commit hash in `new_config` differs from `old_config`""" + def detect(cls, old_config, new_config, branching_config=None): + """Detect if commit hash in `new_config` differs from `old_config` + :param branching_config: + """ old_hash_commit = old_config['metadata'].get('VCS', None) new_hash_commit = new_config['metadata'].get('VCS') - if new_hash_commit and old_hash_commit != new_hash_commit: + ignore_code_changes = branching_config is not None and \ + branching_config.get('ignore_code_changes', False) + if not ignore_code_changes and new_hash_commit and old_hash_commit != new_hash_commit: yield cls(old_config, new_config) - def get_marked_arguments(self, conflicts): + def get_marked_arguments(self, conflicts, code_change_type=None, **branching_kwargs): """Find and return marked arguments for code change conflict .. seealso:: @@ -1111,7 +1128,10 @@ def get_marked_arguments(self, conflicts): if change_type: return dict(change_type=change_type) - return dict(change_type=adapters.CodeChange.BREAK) + if code_change_type is None: + code_change_type = orion.core.config.evc.code_change_type + + return dict(change_type=code_change_type) def try_resolve(self, change_type=None): """Try to create a resolution CodeResolution @@ -1208,30 +1228,45 @@ class CommandLineConflict(Conflict): """ + # pylint: disable=unused-argument @classmethod - def get_nameless_args(cls, config): + def get_nameless_args(cls, config, user_script_config=None, + non_monitored_arguments=None, **kwargs): """Get user's commandline arguments which are not dimension definitions""" - parser = OrionCmdlineParser(orion.core.config.user_script_config) + # Used python API + if 'parser' not in config['metadata']: + return "" + + if user_script_config is None: + user_script_config = orion.core.config.worker.user_script_config + if non_monitored_arguments is None: + non_monitored_arguments = orion.core.config.evc.non_monitored_arguments + + parser = OrionCmdlineParser(user_script_config) parser.set_state_dict(config['metadata']['parser']) priors = parser.priors_to_normal() nameless_keys = set(parser.parser.arguments.keys()) - set(priors.keys()) nameless_args = {key: arg for key, arg in parser.parser.arguments.items() - if key in nameless_keys} + if key in nameless_keys and key not in non_monitored_arguments} return " ".join(" ".join([key, str(arg)]) for key, arg in sorted(nameless_args.items(), key=lambda a: a[0])) @classmethod - def detect(cls, old_config, new_config): - """Detect if command line call in `new_config` differs from `old_config`""" - old_nameless_args = cls.get_nameless_args(old_config) - new_nameless_args = cls.get_nameless_args(new_config) + def detect(cls, old_config, new_config, branching_config=None): + """Detect if command line call in `new_config` differs from `old_config` + :param branching_config: + """ + if branching_config is None: + branching_config = {} + old_nameless_args = cls.get_nameless_args(old_config, **branching_config) + new_nameless_args = cls.get_nameless_args(new_config, **branching_config) if old_nameless_args != new_nameless_args: yield cls(old_config, new_config) - def get_marked_arguments(self, conflicts): + def get_marked_arguments(self, conflicts, cli_change_type=None, **branching_kwargs): """Find and return marked arguments for cli change conflict .. seealso:: @@ -1244,7 +1279,10 @@ def get_marked_arguments(self, conflicts): if change_type: return dict(change_type=change_type) - return dict(change_type=adapters.CommandLineChange.BREAK) + if cli_change_type is None: + cli_change_type = orion.core.config.evc.cli_change_type + + return dict(change_type=cli_change_type) def try_resolve(self, change_type=None): """Try to create a resolution CommandLineResolution @@ -1340,10 +1378,18 @@ class ScriptConfigConflict(Conflict): """ + # pylint:disable=unused-argument @classmethod - def get_nameless_config(cls, config): + def get_nameless_config(cls, config, user_script_config=None, **branching_kwargs): """Get configuration dict of user's script without dimension definitions""" - parser = OrionCmdlineParser(orion.core.config.user_script_config) + # Used python API + if 'parser' not in config['metadata']: + return "" + + if user_script_config is None: + user_script_config = orion.core.config.worker.user_script_config + + parser = OrionCmdlineParser(user_script_config) parser.set_state_dict(config['metadata']['parser']) nameless_config = dict((key, value) @@ -1353,15 +1399,20 @@ def get_nameless_config(cls, config): return nameless_config @classmethod - def detect(cls, old_config, new_config): - """Detect if user's script's config file in `new_config` differs from `old_config`""" - old_script_config = cls.get_nameless_config(old_config) - new_script_config = cls.get_nameless_config(new_config) + def detect(cls, old_config, new_config, branching_config=None): + """Detect if user's script's config file in `new_config` differs from `old_config` + :param branching_config: + """ + if branching_config is None: + branching_config = {} + + old_script_config = cls.get_nameless_config(old_config, **branching_config) + new_script_config = cls.get_nameless_config(new_config, **branching_config) if old_script_config != new_script_config: yield cls(old_config, new_config) - def get_marked_arguments(self, conflicts): + def get_marked_arguments(self, conflicts, config_change_type=None, **branching_kwargs): """Find and return marked arguments for user's script's config change conflict .. seealso:: @@ -1374,7 +1425,10 @@ def get_marked_arguments(self, conflicts): if change_type: return dict(change_type=change_type) - return dict(change_type=adapters.ScriptConfigChange.BREAK) + if config_change_type is None: + config_change_type = orion.core.config.evc.config_change_type + + return dict(change_type=config_change_type) def try_resolve(self, change_type=None): """Try to create a resolution ScriptConfigResolution @@ -1470,14 +1524,15 @@ class ExperimentNameConflict(Conflict): """ @classmethod - def detect(cls, old_config, new_config): + def detect(cls, old_config, new_config, branching_config=None): """Return experiment name conflict no matter what Branching event cannot be triggered experiment name is not the same. + :param branching_config: """ yield cls(old_config, new_config) - def get_marked_arguments(self, conflicts): + def get_marked_arguments(self, conflicts, **branching_kwargs): """Find and return marked arguments for experiment name conflict .. seealso:: @@ -1493,9 +1548,9 @@ def get_marked_arguments(self, conflicts): return {} @property - def username(self): - """Retrieve username for configuration""" - return self.new_config['metadata']['user'] + def version(self): + """Retrieve version of configuration""" + return self.old_config['version'] def try_resolve(self, new_name=None): """Try to create a resolution ExperimentNameResolution @@ -1509,7 +1564,7 @@ def try_resolve(self, new_name=None): Raises ------ ValueError - If name already exists in database for current user. + If name already exists in database for current version. """ if self.is_resolved: @@ -1524,8 +1579,8 @@ def diff(self): def __repr__(self): """Reprensentation of the conflict for user interface""" - return "Experiment name \'{0}\' already exist for user \'{1}\'".format( - self.old_config['name'], self.username) + return "Experiment name \'{0}\' already exist with version \'{1}\'".format( + self.old_config['name'], self.version) class ExperimentNameResolution(Resolution): """Representation of an experiment name resolution @@ -1543,7 +1598,7 @@ class ExperimentNameResolution(Resolution): """ - ARGUMENT = "--branch" + ARGUMENT = "--branch-to" def __init__(self, conflict, new_name): """Initialize resolution and mark conflict as resolved @@ -1559,7 +1614,7 @@ def __init__(self, conflict, new_name): Raises ------ ValueError - If name already exists in database with a direct child for current user. + If name already exists in database with a direct child for current version. """ super(ExperimentNameConflict.ExperimentNameResolution, self).__init__(conflict) @@ -1573,7 +1628,7 @@ def __init__(self, conflict, new_name): self.conflict.new_config['version'] = self.new_version def _validate(self): - """Validate new_name is not in database with a direct child for current user""" + """Validate new_name is not in database with a direct child for current version""" # TODO: WARNING!!! _name_is_unique could lead to race conditions, # The resolution may become invalid before the branching experiment is # registered. What should we do in such case? @@ -1590,16 +1645,16 @@ def _validate(self): # the version of the experiment. elif self._check_for_greater_versions(): raise ValueError( - "Experiment name \'{0}\' already exist for user \'{1}\' and has children. " + "Experiment name \'{0}\' already exist for version \'{1}\' and has children. " "Version cannot be auto-incremented and a new name is required for branching." - .format(self.new_name, self.conflict.username)) + .format(self.new_name, self.conflict.version)) else: self.new_name = self.old_name self.new_version = self.conflict.old_config.get('version', 1) + 1 def _name_is_unique(self): - """Return True if given name is not in database for current user""" - query = {'name': self.new_name, 'metadata.user': self.conflict.username} + """Return True if given name is not in database for current version""" + query = {'name': self.new_name, 'version': self.conflict.version} named_experiments = len(get_storage().fetch_experiments(query)) return named_experiments == 0 @@ -1632,7 +1687,7 @@ def __repr__(self): @property def is_marked(self): - """Return True every time since the `--branch` argument is not used when incrementing - version of an experiment. + """Return True every time since the `--branch-from` argument is not used when + incrementing version of an experiment. """ return True diff --git a/src/orion/core/evc/experiment.py b/src/orion/core/evc/experiment.py index fb17a198c..06781cf69 100644 --- a/src/orion/core/evc/experiment.py +++ b/src/orion/core/evc/experiment.py @@ -19,9 +19,9 @@ import logging from orion.core.evc.tree import TreeNode -from orion.core.worker.experiment import ExperimentView from orion.storage.base import get_storage + log = logging.getLogger(__name__) @@ -68,8 +68,10 @@ def item(self): not done already. """ if self._item is None: - self._item = ExperimentView(self.name, version=self.version) - self._item.connect_to_version_control_tree(self) + # TODO: Find another way around the circular import + import orion.core.io.experiment_builder as experiment_builder + self._item = experiment_builder.build_view(name=self.name, version=self.version) + self._item._experiment._node = self return self._item @@ -85,7 +87,7 @@ def parent(self): """ if self._parent is None and self._no_parent_lookup: self._no_parent_lookup = False - query = {'_id': self.item.refers['parent_id']} + query = {'_id': self.item.refers.get('parent_id')} selection = {'name': 1, 'version': 1} experiments = get_storage().fetch_experiments(query, selection) diff --git a/src/orion/core/io/cmdline_parser.py b/src/orion/core/io/cmdline_parser.py index f50324a39..8fcbc63d0 100644 --- a/src/orion/core/io/cmdline_parser.py +++ b/src/orion/core/io/cmdline_parser.py @@ -49,6 +49,7 @@ class CmdlineParser(object): def __init__(self): """See `CmdlineParser` description""" # TODO Handle parsing twice. + self.keys = dict() self.arguments = OrderedDict() self._already_parsed = False self.template = [] @@ -57,10 +58,15 @@ def get_state_dict(self): """Give state dict that can be used to reconstruct the parser""" return dict( arguments=list(map(list, self.arguments.items())), + keys=list(map(list, self.keys.items())), template=self.template) def set_state_dict(self, state): """Reset the parser based on previous state""" + if state.get('keys') is None: + # NOTE: To support experiments prior to 0.1.9 + state['keys'] = [(key, self._key_to_arg(key)) for key in state['arguments']] + self.keys = OrderedDict(state['keys']) self.arguments = OrderedDict(state['arguments']) self.template = state['template'] self._already_parsed = bool(self.template) @@ -138,7 +144,6 @@ def parse(self, commandline): value following it until the next named argument. Positional arguments following a named argument are not currently supported. - Aggregation of single characters arguments is not supported yet. Ex: `-xzvf` Examples -------- @@ -167,17 +172,18 @@ def parse(self, commandline): if self._already_parsed: raise RuntimeError("The commandline has already been parsed.") - self.arguments = self._parse_arguments(commandline) + keys, arguments = self._parse_arguments(commandline) + self.arguments = arguments + self.keys = keys - for key, value in self.arguments.items(): + for key, value in arguments.items(): # Handle positional arguments if key.startswith("_"): self.template.append("{" + key + "}") # Handle optional ones else: - arg = self._key_to_arg(key) - + arg = self.keys[key] if arg in self.template: continue @@ -210,35 +216,39 @@ def _key_to_arg(arg): def _parse_arguments(self, commandline): arguments = OrderedDict() - argument_name = None + keys = OrderedDict() + key = None for item in commandline: # Handle keyworded arguments if item.startswith("-"): # Make sure we're not defining the same argument twice - argument_name = item.lstrip('-') # If the argument is in the form of `--name=value` - argument_parts = argument_name.split('=') + argument_parts = item.split('=') argument_name = argument_parts[0] + key = argument_name.lstrip('-') - if argument_name in arguments.keys(): + if key in keys: raise ValueError("Conflict: two arguments have the same name: {}" - .format(argument_name)) + .format(key)) - arguments[argument_name] = [] + arguments[key] = [] + keys[key] = argument_name if len(argument_parts) > 1: - arguments[argument_name].append(argument_parts[-1]) + arguments[key].append(argument_parts[-1]) # If the argument did not start with `-` but we have an argument name # That means that this value belongs to that argument name list - elif argument_name is not None and item.strip(" "): - arguments[argument_name].append(item) + elif key is not None and item.strip(" "): + arguments[key].append(item) # No argument name means we have not reached them yet, so we're still in the # Positional arguments part - elif argument_name is None: - arguments["_pos_{}".format(len(arguments))] = item + elif key is None: + _pos_key = "_pos_{}".format(len(arguments)) + keys[_pos_key] = _pos_key + arguments[_pos_key] = item for key, value in arguments.items(): # Loop through the items and check if their value is a list @@ -253,7 +263,7 @@ def _parse_arguments(self, commandline): value = self._parse_paths(value) arguments[key] = value - return arguments + return keys, arguments def _parse_paths(self, value): if isinstance(value, list): diff --git a/src/orion/core/io/config.py b/src/orion/core/io/config.py index 5035e32c0..fb21112c8 100644 --- a/src/orion/core/io/config.py +++ b/src/orion/core/io/config.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +# pylint: disable=redefined-builtin """ :mod:`orion.core.io.config` -- Configuration object =================================================== @@ -11,6 +12,7 @@ Highly inspired from https://github.com/mila-iqia/blocks/blob/master/blocks/config.py. """ +import contextlib import logging import os @@ -25,10 +27,24 @@ NOT_SET = object() +@contextlib.contextmanager +def _disable_logger(disable=True): + if disable: + logger.disabled = True + yield + + if disable: + logger.disabled = False + + class ConfigurationError(Exception): """Error raised when a configuration value is requested but not set.""" +def _curate(key): + return key.replace('-', '_') + + class Configuration: """Configuration object @@ -56,10 +72,12 @@ class Configuration: """ - SPECIAL_KEYS = ['_config', '_yaml', '_default', '_env_var'] + SPECIAL_KEYS = ['_config', '_subconfigs', '_yaml', '_default', '_env_var', '_help', + '_deprecated'] def __init__(self): self._config = {} + self._subconfigs = {} def load_yaml(self, path): """Load yaml file and set global default configuration @@ -81,9 +99,13 @@ def load_yaml(self, path): return # implies that yaml must be in dict form for key, value in flatten(cfg).items(): - default = self[key] + default = self[key + '._default'] + deprecated = self[key + '._deprecated'] logger.debug('Overwritting "%s" default %s with %s', key, default, value) self[key + '._yaml'] = value + if deprecated and deprecated.get('alternative'): + logger.debug('Overwritting "%s" default %s with %s', key, default, value) + self[deprecated.get('alternative') + '._yaml'] = value def __getattr__(self, key): """Get the value of the option @@ -105,9 +127,12 @@ def __getattr__(self, key): """ if key == 'config': raise AttributeError - if key not in self._config: + + if key not in self._config and key not in self._subconfigs: raise ConfigurationError("Configuration does not have an attribute " "'{}'.".format(key)) + if key in self._subconfigs: + return self._subconfigs[key] config_setting = self._config[key] if 'value' in config_setting: @@ -123,6 +148,9 @@ def __getattr__(self, key): raise ConfigurationError("Configuration not set and no default " "provided: {}.".format(key)) + if config_setting.get('deprecated'): + self._deprecate(key) + return config_setting['type'](value) def __setattr__(self, key, value): @@ -149,17 +177,54 @@ def __setattr__(self, key, value): configuration object. """ + key = _curate(key) if key not in self.SPECIAL_KEYS and key in self._config: self._validate(key, value) self._config[key]['value'] = value + if self._config[key].get('deprecated'): + self._deprecate(key, value) - elif key == '_config' or isinstance(value, Configuration): + elif key in ['_config', '_subconfigs']: super(Configuration, self).__setattr__(key, value) + elif key in self._subconfigs: + raise ValueError('Configuration already contains subconfiguration {}'.format(key)) + + elif isinstance(value, Configuration): + self._subconfigs[key] = value + else: raise TypeError("Can only set {} as a Configuration, not {}. Use add_option to set a " "new option.".format(key, type(value))) + # pylint: disable=unused-argument + def _deprecate(self, key, value=NOT_SET): + deprecate = self._config[key]['deprecated'] + message = "(DEPRECATED) Option `%s` will be removed in %s." + args = [deprecate.get('name', key), deprecate['version']] + if 'alternative' in deprecate: + message += " Use `%s` instead." + args.append(deprecate['alternative']) + + logger.warning(message, *args) + + def get(self, key, deprecated='warn'): + """Access value + + Parameters + ---------- + key: str + Key to access in the configuration. Similar to config.key. + deprecated: str, optional + If 'warn', the access to deprecated options will log a deprecation warning. + else if 'ignore', no warning will be logged for access to deprecated options. + + """ + with _disable_logger(disable=(deprecated == 'ignore')): + value = self[key] + + return value + def _validate(self, key, value): """Validate the (key, value) option @@ -195,13 +260,15 @@ def __setitem__(self, key, value): A general object to set an option. """ - keys = key.split(".") + keys = list(map(_curate, key.split("."))) # Set in current config for special keys if len(keys) == 2 and keys[-1] in self.SPECIAL_KEYS: key, field = keys self._validate(key, value) self._config[key][field.lstrip('_')] = value + if self._config[key].get('deprecated'): + self._deprecate(key) # Set in current configuration elif len(keys) == 1: @@ -226,18 +293,27 @@ def __getitem__(self, key): Ex: 'first.second.third' """ - keys = key.split(".") + keys = list(map(_curate, key.split("."))) + # Recursively in sub configurations - if len(keys) > 1: + if len(keys) == 2 and keys[1] in self.SPECIAL_KEYS: + key_config = self._config.get(keys[0], None) + if key_config is None: + raise ConfigurationError("Configuration does not have an attribute " + "'{}'.".format(keys[0])) + return key_config.get(keys[1][1:], None) + elif len(keys) > 1: subconfig = getattr(self, keys[0]) if subconfig is None: - raise KeyError("'{}' is not defined in configuration.".format(keys[0])) + raise ConfigurationError("Configuration does not have an attribute " + "'{}'.".format(key)) return subconfig[".".join(keys[1:])] # Set in current configuration else: return getattr(self, keys[0]) - def add_option(self, key, option_type, default=NOT_SET, env_var=None): + def add_option(self, key, option_type, default=NOT_SET, env_var=None, deprecate=None, + help=None): """Add a configuration setting. Parameters @@ -259,10 +335,87 @@ def add_option(self, key, option_type, default=NOT_SET, env_var=None): The environment variable name that holds this configuration value. If not given, this configuration can only be set in the YAML configuration file. + deprecate: `dict`, optional + Should define dict(version, alternative), version at which the deprecated option will be + removed and alternative to use. A deprecation warning will be logged each time this + option is set by user. The option `name` can be used in addition to `version` and + `alternative` to provide a different name then the key. This is useful if the key + is in a subconfiguration and we want the deprecation error message to include the full + path. This will add (DEPRECATED) at the beginning of the help message. + help : str, optionial + Documentation for the option. Can be reused to build documentation + or to build parsers with help messages. + Default help message is 'Undocumented'. """ + key = _curate(key) + if key in self._config or key in self._subconfigs: + raise ValueError('Configuration already contains {}'.format(key)) self._config[key] = {'type': option_type} if env_var is not None: self._config[key]['env_var'] = env_var if default is not NOT_SET: self._config[key]['default'] = default + if deprecate is not None: + if 'version' not in deprecate: + raise ValueError(f'`version` is missing in deprecate option: {deprecate}') + self._config[key]['deprecated'] = deprecate + + if help is None: + help = 'Undocumented' + + if default is not NOT_SET: + help += ' (default: {})'.format(default) + if deprecate is not None: + help = '(DEPRECATED) ' + help + self._config[key]['help'] = help + + def help(self, key): + """Return the help message for the given option.""" + return self[key + '._help'] + + def add_arguments(self, parser, rename=None): + """Add arguments to an `argparse` parser based on configuration + + This does not support subconfigurations. They will be ignored. + + Parameters + ---------- + parser: `argparse.ArgumentParser` + Parser to which this function will add arguments + rename: dict, optional + Mappings to provide different commandline names. Ex `{key: --my-arg}` + + """ + if rename is None: + rename = dict() + + for key in self._config: + # TODO: Try with list and nargs='*', but it may case issues with + # nargs=argparse.REMAINDER. + if self._config[key]['type'] in (dict, list, tuple): + continue + + # NOTE: Do not set default, if parser.parse_argv().options[key] is None, then code + # should look to config[key]. + arg_name = rename.get(key, "--{}".format(key.replace('_', '-'))) + parser.add_argument( + arg_name, type=self._config[key]['type'], + help=self._config[key].get('help')) + + def __contains__(self, key): + """Return True if the option is defined.""" + return key in self._config or key in self._subconfigs + + def to_dict(self): + """Return a dictionary representation of the configuration""" + config = dict() + + with _disable_logger(): + for key in self._config: + config[key] = self[key] + + for key in self._subconfigs: + config[key] = self[key].to_dict() + + return config diff --git a/src/orion/core/io/database/__init__.py b/src/orion/core/io/database/__init__.py index 040d112d3..08209338a 100644 --- a/src/orion/core/io/database/__init__.py +++ b/src/orion/core/io/database/__init__.py @@ -305,6 +305,12 @@ class DuplicateKeyError(DatabaseError): pass +class DatabaseTimeout(DatabaseError): + """Exception type used when there is a timeout during database operations.""" + + pass + + class OutdatedDatabaseError(DatabaseError): """Exception type used when the database is outdated.""" diff --git a/src/orion/core/io/database/ephemeraldb.py b/src/orion/core/io/database/ephemeraldb.py index 6d0dcd48f..87d59ce26 100644 --- a/src/orion/core/io/database/ephemeraldb.py +++ b/src/orion/core/io/database/ephemeraldb.py @@ -301,11 +301,16 @@ def delete_many(self, query=None): retained_documents = [] for document in self._documents: if not document.match(query): - retained_documents.append(document) + retained_documents.append(document.to_dict()) else: deleted += 1 - self._documents = retained_documents + # Reset indexes + for name, (keys, _) in self._indexes.items(): + self._indexes[name] = (keys, set()) + + self._documents = [] + self.insert_many(retained_documents) return deleted diff --git a/src/orion/core/io/database/mongodb.py b/src/orion/core/io/database/mongodb.py index abc373f8a..ab6d8be30 100644 --- a/src/orion/core/io/database/mongodb.py +++ b/src/orion/core/io/database/mongodb.py @@ -13,7 +13,7 @@ import pymongo from orion.core.io.database import ( - AbstractDB, DatabaseError, DuplicateKeyError) + AbstractDB, DatabaseError, DatabaseTimeout, DuplicateKeyError) AUTH_FAILED_MESSAGES = [ @@ -42,6 +42,20 @@ def _decorator(self, *args, **kwargs): try: rval = method(self, *args, **kwargs) + except pymongo.errors.ExecutionTimeout as e: + # Raised when a database operation times out, exceeding the $maxTimeMS set in + # the query or command option. + raise DatabaseTimeout() from e + except pymongo.errors.NetworkTimeout as e: + # An operation on an open connection exceeded socketTimeoutMS. + # + # The remaining connections in the pool stay open. In the case of a + # write operation, you cannot know whether it succeeded or failed. + raise DatabaseTimeout() from e + except pymongo.errors.WTimeoutError as e: + # Raised when a database operation times out (i.e. wtimeout expires) + # before replication completes. + raise DatabaseTimeout() from e except pymongo.errors.DuplicateKeyError as e: raise DuplicateKeyError(str(e)) from e except pymongo.errors.BulkWriteError as e: @@ -253,7 +267,7 @@ def count(self, collection_name, query=None): """ dbcollection = self._db[collection_name] - if hasattr(dbcollection, 'count_documents'): + if not isinstance(getattr(dbcollection, 'count_documents'), pymongo.collection.Collection): return dbcollection.count_documents(filter=query if query else {}) return dbcollection.count(filter=query) diff --git a/src/orion/core/io/database/pickleddb.py b/src/orion/core/io/database/pickleddb.py index 984162054..d89522fe1 100644 --- a/src/orion/core/io/database/pickleddb.py +++ b/src/orion/core/io/database/pickleddb.py @@ -15,16 +15,38 @@ import pickle from pickle import PicklingError -from filelock import FileLock +from filelock import FileLock, Timeout import orion.core -from orion.core.io.database import AbstractDB +from orion.core.io.database import AbstractDB, DatabaseTimeout from orion.core.io.database.ephemeraldb import EphemeralDB log = logging.getLogger(__name__) DEFAULT_HOST = os.path.join(orion.core.DIRS.user_data_dir, 'orion', 'orion_db.pkl') +TIMEOUT_ERROR_MESSAGE = """\ +Could not acquire lock for PickledDB after {} seconds. + +This is likely due to one or many of the following scenarios: + +1. There is a large amount of workers and many simultaneous queries. This typically occurs + when the task to optimize is short (few minutes). Try to reduce the amount of workers + at least below 50. + +2. The database is growing large with thousands of trials and many experiments. + If so, you can use a different PickleDB (different file, that is, different `host`) + for each experiment seperately to alleviate this issue. + +3. The filesystem is slow. Parallel filesystems on HPC often suffer from + large pool of users generating frequent I/O. In this case try using a separate + partition that may be less affected. + +If you cannot solve the issues listed above that are causing timeouts, you +may need to setup the MongoDB backend for better performance. +See https://orion.readthedocs.io/en/stable/install/database.html +""" + def find_unpickable_doc(dict_of_dict): """Look for a dictionary that cannot be pickled.""" @@ -69,13 +91,18 @@ class PickledDB(AbstractDB): host: str File path to save pickled ephemeraldb. Default is {user data dir}/orion/orion_db.pkl ex: $HOME/.local/share/orion/orion_db.pkl + timeout: int + Maximum number of seconds to wait for the lock before raising DatabaseTimeout. + Default is 60. """ # pylint: disable=unused-argument - def __init__(self, host=DEFAULT_HOST, *args, **kwargs): + def __init__(self, host=DEFAULT_HOST, timeout=60, *args, **kwargs): super(PickledDB, self).__init__(host) + self.timeout = timeout + if os.path.dirname(host): os.makedirs(os.path.dirname(host), exist_ok=True) @@ -198,10 +225,13 @@ def locked_database(self, write=True): """Lock database file during wrapped operation call.""" lock = FileLock(self.host + '.lock') - with lock.acquire(timeout=60): - database = self._get_database() + try: + with lock.acquire(timeout=self.timeout): + database = self._get_database() - yield database + yield database - if write: - self._dump_database(database) + if write: + self._dump_database(database) + except Timeout as e: + raise DatabaseTimeout(TIMEOUT_ERROR_MESSAGE.format(self.timeout)) from e diff --git a/src/orion/core/io/evc_builder.py b/src/orion/core/io/evc_builder.py deleted file mode 100644 index 71d753236..000000000 --- a/src/orion/core/io/evc_builder.py +++ /dev/null @@ -1,61 +0,0 @@ -# -*- coding: utf-8 -*- -# pylint:disable=protected-access -""" -:mod:`orion.core.io.evc_builder` -- Builder of experiment version control tree -============================================================================== - -.. module:: experiment - :platform: Unix - :synopsis: Builder of the experiment version control tree - -The EVCBuilder takes care of building a main experiment along with an EVC tree and connect them -together. - -A user can define a root and some leafs that should be the extremums of the tree. Those can be -different than the actual root and leafs of the global EVC tree, making the trimmed version a small -subset of the global version. - -""" -from orion.core.evc.experiment import ExperimentNode -from orion.core.io.experiment_builder import ExperimentBuilder - - -class EVCBuilder(object): - """Builder of experiment version control trees using - :class:`orion.core.evc.experiment.ExperimentNode` - - .. seealso:: - - `orion.core.io.experiment_builder` for more information on the process of building - experiments. - - :class:`orion.core.evc.experiment` - :class:`orion.core.worker.experiment` - """ - - # pylint:disable=no-self-use - def connect_to_version_control_tree(self, experiment): - """Build the EVC and connect the experiment to it""" - experiment_node = ExperimentNode(experiment.name, experiment.version, experiment=experiment) - experiment.connect_to_version_control_tree(experiment_node) - - def build_view_from(self, cmdargs): - """Build an experiment view based on global config and connect it to the EVC""" - experiment_view = ExperimentBuilder().build_view_from(cmdargs) - self.connect_to_version_control_tree(experiment_view) - - return experiment_view - - def build_from(self, cmdargs): - """Build an experiment based on config and connect it to the EVC""" - experiment = ExperimentBuilder().build_from(cmdargs) - self.connect_to_version_control_tree(experiment) - - return experiment - - def build_from_config(self, config): - """Build an experiment based on given config and connect it to the EVC""" - experiment = ExperimentBuilder().build_from_config(config) - self.connect_to_version_control_tree(experiment) - - return experiment diff --git a/src/orion/core/io/experiment_branch_builder.py b/src/orion/core/io/experiment_branch_builder.py index 2cb1ecc53..af04d9fe9 100644 --- a/src/orion/core/io/experiment_branch_builder.py +++ b/src/orion/core/io/experiment_branch_builder.py @@ -22,6 +22,7 @@ import logging from orion.algo.space import Dimension +import orion.core from orion.core.evc import conflicts from orion.core.evc.adapters import CompositeAdapter @@ -31,22 +32,46 @@ # pylint: disable=too-many-public-methods class ExperimentBranchBuilder: - """Build a new configuration for the experiment based on parent config.""" + """Build a new configuration for the experiment based on parent config. + + Parameters + ---------- + conflicts: Conflicts + Object reprenting a group of conflicts + manual_resolution: bool, optional + Starts the prompt to resolve manually the conflicts. Use system's default if not provided. + branch_from: str, optional + Name of the experiment to branch from. + algorithm_change: bool, optional + Whether to automatically solve the algorithm conflict (change of algo config). + Defaults to True. + code_change_type: str, optional + How to resolve code change automatically. Must be one of 'noeffect', 'unsure' or + 'break'. Defaults to 'break'. + cli_change_type: str, optional + How to resolve cli change automatically. Must be one of 'noeffect', 'unsure' or 'break'. + Defaults to 'break'. + config_change_type: str, optional + How to resolve config change automatically. Must be one of 'noeffect', 'unsure' or + 'break'. Defaults to 'break'. + + """ + + def __init__(self, conflicts, manual_resolution=None, **branching_arguments): + # TODO: handle all other arguments + if manual_resolution is None: + manual_resolution = orion.core.config.evc.manual_resolution + + self.manual_resolution = manual_resolution + self.conflicts = conflicts - def __init__(self, conflicts, branching_configuration): - """ - Initialize the ExperimentBranchBuilder by populating a list of the conflicts inside - the two configurations. - """ - self.auto_resolution = branching_configuration.pop('auto_resolution', None) + for key, value in branching_arguments.items(): + if value is None and key in orion.core.config.evc: + branching_arguments[key] = orion.core.config.evc[key] - if self.auto_resolution is not None: - log.info("Auto-resolution is deprecated and will be removed in v0.2.0.") - self.auto_resolution = None + self.branching_arguments = branching_arguments - self.manual_resolution = branching_configuration.pop('manual_resolution', False) - self.conflicts = conflicts - self.conflicting_config.update(branching_configuration) + self.conflicting_config.update(branching_arguments) self.resolve_conflicts() @property @@ -70,7 +95,7 @@ def resolve_conflicts(self, silence_errors=True): resolution = self.conflicts.try_resolve( conflict, silence_errors=silence_errors, - **conflict.get_marked_arguments(self.conflicts)) + **conflict.get_marked_arguments(self.conflicts, **self.branching_arguments)) if resolution and (self.manual_resolution and not resolution.is_marked): self.conflicts.revert(resolution) diff --git a/src/orion/core/io/experiment_builder.py b/src/orion/core/io/experiment_builder.py index 3194505a7..30e37f8bb 100644 --- a/src/orion/core/io/experiment_builder.py +++ b/src/orion/core/io/experiment_builder.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -# pylint:disable=protected-access """ :mod:`orion.core.io.experiment_builder` -- Create experiment from user options ============================================================================== @@ -87,222 +86,498 @@ """ import copy +import datetime +import getpass import logging +import sys +from orion.algo.space import Space import orion.core +from orion.core.evc.adapters import Adapter +from orion.core.evc.conflicts import detect_conflicts, ExperimentNameConflict from orion.core.io import resolve_config from orion.core.io.database import DuplicateKeyError -from orion.core.io.orion_cmdline_parser import OrionCmdlineParser -from orion.core.utils.exceptions import NoConfigurationError, RaceCondition +from orion.core.io.experiment_branch_builder import ExperimentBranchBuilder +from orion.core.io.interactive_commands.branching_prompt import BranchingPrompt +from orion.core.io.space_builder import SpaceBuilder +import orion.core.utils.backward as backward +from orion.core.utils.exceptions import ( + BranchingEvent, NoConfigurationError, NoNameError, RaceCondition) from orion.core.worker.experiment import Experiment, ExperimentView -from orion.storage.base import Storage +from orion.core.worker.primary_algo import PrimaryAlgo +from orion.core.worker.strategy import Strategy +from orion.storage.base import get_storage, setup_storage log = logging.getLogger(__name__) -# pylint: disable=too-many-public-methods -class ExperimentBuilder(object): - """Builder for :class:`orion.core.worker.experiment.Experiment` - and :class:`orion.core.worker.experiment.ExperimentView` +## +# Functions to build experiments +## + +def build(name, version=None, branching=None, **config): + """Build an experiment object + + If new, `space` argument must be provided, else all arguments are fetched from the database + based on (name, version). If any argument given does not match the corresponding ones in the + database for given (name, version), than the version is incremented and the experiment will be a + child of the previous version. + + Parameters + ---------- + name: str + Name of the experiment to build + version: int, optional + Version to select. If None, last version will be selected. If version given is larger than + largest version available, the largest version will be selected. + branch_from: str, optional + Name of the experiment to branch from. The new experiment will have access to all trials + from the parent experiment it has been branched from. + space: dict, optional + Optimization space of the algorithm. Should have the form `dict(name='(args)')`. + algorithms: str or dict, optional + Algorithm used for optimization. + strategy: str or dict, optional + Parallel strategy to use to parallelize the algorithm. + max_trials: int, optional + Maximum number or trials before the experiment is considered done. + storage: dict, optional + Configuration of the storage backend. + branching: dict, optional + Arguments to control the branching. + + branch_from: str, optional + Name of the experiment to branch from. + manual_resolution: bool, optional + Starts the prompt to resolve manually the conflicts. Defaults to False. + non_monitored_arguments: list of str, optional + Will ignore these arguments while looking for differences. Defaults to []. + ignore_code_changes: bool, optional + Will ignore code changes while looking for differences. Defaults to False. + algorithm_change: bool, optional + Whether to automatically solve the algorithm conflict (change of algo config). + Defaults to True. + code_change_type: str, optional + How to resolve code change automatically. Must be one of 'noeffect', 'unsure' or + 'break'. Defaults to 'break'. + cli_change_type: str, optional + How to resolve cli change automatically. Must be one of 'noeffect', 'unsure' or 'break'. + Defaults to 'break'. + config_change_type: str, optional + How to resolve config change automatically. Must be one of 'noeffect', 'unsure' or + 'break'. Defaults to 'break'. - .. seealso:: + """ + config = copy.deepcopy(config) + for key, value in list(config.items()): + if key.startswith('_') or value is None: + config.pop(key) - `orion.core.io.experiment_builder` for more information on the process of building - experiments. + if 'strategy' in config: + config['producer'] = {'strategy': config.pop('strategy')} - :class:`orion.core.worker.experiment.Experiment` - :class:`orion.core.worker.experiment.ExperimentView` - """ + if branching is None: + branching = {} + + if branching.get('branch_from'): + branching.setdefault('branch_to', name) + name = branching['branch_from'] + + db_config = fetch_config_from_db(name, version) + + new_config = config + config = resolve_config.merge_configs(db_config, config) - # pylint:disable=no-self-use - def fetch_default_options(self): - """Get dictionary of default options""" - return resolve_config.fetch_default_options() + metadata = resolve_config.fetch_metadata(config.get('user'), config.get('user_args')) - # pylint:disable=no-self-use - def fetch_env_vars(self): - """Get dictionary of environment variables specific to Oríon""" - return resolve_config.fetch_env_vars() + config = resolve_config.merge_configs(db_config, config, {'metadata': metadata}) - def fetch_file_config(self, cmdargs): - """Get dictionary of options from configuration file provided in command-line""" - return resolve_config.fetch_config(cmdargs) + # TODO: Find a better solution + if isinstance(config.get('algorithms'), dict) and len(config['algorithms']) > 1: + config['algorithms'] = new_config['algorithms'] - def fetch_config_from_db(self, cmdargs): - """Get dictionary of options from experiment found in the database + config.setdefault('name', name) + config.setdefault('version', version) - Notes - ----- - This method builds an experiment view in the background to fetch the configuration from - the database. + if 'space' not in config: + raise NoConfigurationError( + 'Experiment {} does not exist in DB and space was not defined.'.format(name)) - """ + if len(config['space']) == 0: + raise NoConfigurationError("No prior found. Please include at least one.") + + experiment = create_experiment(**copy.deepcopy(config)) + if experiment.id is None: try: - experiment_view = self.build_view_from(cmdargs) - except ValueError as e: - if "No experiment with given name" in str(e): - return {} - raise + _register_experiment(experiment) + except DuplicateKeyError: + experiment = build(branching=branching, **config) - return experiment_view.configuration + return experiment - def fetch_metadata(self, cmdargs): - """Infer rest information about the process + versioning""" - return resolve_config.fetch_metadata(cmdargs) + conflicts = _get_conflicts(experiment, branching) + must_branch = len(conflicts.get()) > 1 or branching.get('branch_to') + if must_branch: + branched_experiment = _branch_experiment(experiment, conflicts, version, branching) + try: + _register_experiment(branched_experiment) + except DuplicateKeyError as e: + raise RaceCondition('There was a race condition during branching.') from e - def fetch_full_config(self, cmdargs, use_db=True): - """Get dictionary of the full configuration of the experiment. + return branched_experiment - .. seealso:: + _update_experiment(experiment) + return experiment - `orion.core.io.experiment_builder` for more information on the hierarchy of - configurations. - Parameters - ---------- - cmdargs: +def build_view(name, version=None): + """Build experiment view - use_db: bool - Use experiment configuration found in database if True. Defaults to True. + An experiment view provides all reading operations of standard experiment but prevents the + modification of the experiment and its trials. - Notes - ----- - This method builds an experiment view in the background to fetch the configuration from - the database. + Parameters + ---------- + name: str + Name of the experiment to build + version: int, optional + Version to select. If None, last version will be selected. If version given is larger than + largest version available, the largest version will be selected. - """ - default_options = self.fetch_default_options() - env_vars = self.fetch_env_vars() - if use_db: - config_from_db = self.fetch_config_from_db(cmdargs) - else: - config_from_db = {} - cmdconfig = self.fetch_file_config(cmdargs) - metadata = dict(metadata=self.fetch_metadata(cmdargs)) + """ + db_config = fetch_config_from_db(name, version) - exp_config = resolve_config.merge_configs( - default_options, env_vars, copy.deepcopy(config_from_db), cmdconfig, cmdargs, metadata) + if not db_config: + message = ("No experiment with given name '%s' and version '%s' inside database, " + "no view can be created." % (name, version if version else '*')) + raise ValueError(message) - if 'user' in exp_config: - exp_config['metadata']['user'] = exp_config['user'] + db_config.setdefault('version', 1) - # TODO: Find a better solution - if isinstance(exp_config['algorithms'], dict) and len(exp_config['algorithms']) > 1: - for key in list(config_from_db['algorithms'].keys()): - exp_config['algorithms'].pop(key) + experiment = create_experiment(**db_config) - return exp_config + return ExperimentView(experiment) - def build_view_from(self, cmdargs): - """Build an experiment view based on full configuration. - .. seealso:: +def create_experiment(name, version, space, **kwargs): + """Instantiate the experiment and its attribute objects - `orion.core.io.experiment_builder` for more information on the hierarchy of - configurations. + All unspecified arguments will be replaced by system's defaults (orion.core.config.*). - :class:`orion.core.worker.experiment.ExperimentView` for more information on the - experiment view object. - """ - local_config = self.fetch_full_config(cmdargs, use_db=False) + Parameters + ---------- + name: str + Name of the experiment. + version: int + Version of the experiment. + space: dict or Space object + Optimization space of the algorithm. If dict, should have the form + `dict(name='(args)')`. + algorithms: str or dict, optional + Algorithm used for optimization. + strategy: str or dict, optional + Parallel strategy to use to parallelize the algorithm. + max_trials: int, optional + Maximum number or trials before the experiment is considered done. + storage: dict, optional + Configuration of the storage backend. - self.setup_storage(local_config) + """ + experiment = Experiment(name=name, version=version) + experiment._id = kwargs.get('_id', None) # pylint:disable=protected-access + experiment.pool_size = kwargs.get('pool_size') + if experiment.pool_size is None: + experiment.pool_size = orion.core.config.experiment.get( + 'pool_size', deprecated='ignore') + experiment.max_trials = kwargs.get('max_trials', orion.core.config.experiment.max_trials) + experiment.space = _instantiate_space(space) + experiment.algorithms = _instantiate_algo(experiment.space, kwargs.get('algorithms')) + experiment.producer = kwargs.get('producer', {}) + experiment.producer['strategy'] = _instantiate_strategy(experiment.producer.get('strategy')) + experiment.working_dir = kwargs.get('working_dir', orion.core.config.experiment.working_dir) + experiment.metadata = kwargs.get('metadata', {'user': kwargs.get('user', getpass.getuser())}) + experiment.refers = kwargs.get('refers', {'parent_id': None, 'root_id': None, 'adapter': []}) + experiment.refers['adapter'] = _instantiate_adapters(experiment.refers.get('adapter', [])) + + return experiment + + +def fetch_config_from_db(name, version=None): + """Fetch configuration from database + + Parameters + ---------- + name: str + Name of the experiment to fetch + version: int, optional + Version to select. If None, last version will be selected. If version given is larger than + largest version available, the largest version will be selected. - # Information should be enough to infer experiment's name. - exp_name = local_config['name'] - if exp_name is None: - raise RuntimeError("Could not infer experiment's name. " - "Please use either `name` cmd line arg or provide " - "one in orion's configuration file.") + """ + configs = get_storage().fetch_experiments({'name': name}) - name = local_config['name'] - user = local_config.get('user', None) - version = local_config.get('version', None) - return ExperimentView(name, user=user, version=version) + if not configs: + return {} - def build_from(self, cmdargs, handle_racecondition=True): - """Build a fully configured (and writable) experiment based on full configuration. + config = _fetch_config_version(configs, version) - .. seealso:: + if len(configs) > 1: + log.info("Many versions for experiment %s have been found. Using latest " + "version %s.", name, config['version']) - `orion.core.io.experiment_builder` for more information on the hierarchy of - configurations. + backward.populate_space(config) - :class:`orion.core.worker.experiment.Experiment` for more information on the experiment - object. - """ - full_config = self.fetch_full_config(cmdargs) + return config - log.info(full_config) - try: - experiment = self.build_from_config(full_config) - except (DuplicateKeyError, RaceCondition): - # Fails if concurrent experiment with identical (name, version) - # is written first in the database. - # Next build_from(cmdargs) should either load experiment from database - # and run smoothly if identical or trigger an experiment fork. - # In other words, there should not be more than 1 level of recursion. - if handle_racecondition: - experiment = self.build_from(cmdargs, handle_racecondition=False) - - raise +## +# Private helper functions to build experiments +## - return experiment +def _instantiate_adapters(config): + """Instantiate the adapter object - def build_from_config(self, config): - """Build a fully configured (and writable) experiment based on full configuration. + Parameters + ---------- + config: list + List of adapter configurations to build a CompositeAdapter for the EVC. - .. seealso:: + """ + return Adapter.build(config) - `orion.core.io.experiment_builder` for more information on the hierarchy of - configurations. - :class:`orion.core.worker.experiment.Experiment` for more information on the experiment - object. - """ - log.info(config) +def _instantiate_space(config): + """Instantiate the space object - # Pop out configuration concerning databases and resources - config.pop('database', None) - config.pop('resources', None) + Build the Space object if argument is a dictionary, else return the Space object as is. - experiment = Experiment(config['name'], config.get('user', None), - config.get('version', None)) + Parameters + ---------- + config: dict or Space object + Dictionary of priors or already built Space object. - # TODO: Handle both from cmdline and python APIs. - if 'priors' not in config['metadata'] and 'user_args' not in config['metadata']: - raise NoConfigurationError + """ + if isinstance(config, Space): + return config - # Parse to generate priors - if 'user_args' in config['metadata']: - parser = OrionCmdlineParser(orion.core.config.user_script_config) - parser.parse(config['metadata']['user_args']) - config['metadata']['parser'] = parser.get_state_dict() - config['metadata']['priors'] = dict(parser.priors) + return SpaceBuilder().build(config) - # Finish experiment's configuration and write it to database. - experiment.configure(config) - return experiment +def _instantiate_algo(space, config): + """Instantiate the algorithm object - def setup_storage(self, config): - """Create the storage instance from a configuration. + Parameters + ---------- + config: dict, optional + Configuration of the strategy. If None of empty, system's defaults are used + (orion.core.config.experiment.algorithms). - Parameters - ---------- - config: dict - Configuration for the database. + """ + if not config: + config = orion.core.config.experiment.algorithms - """ - # TODO: Fix this in config refactoring - storage_opts = config.get('protocol', {'type': 'legacy'}) - storage_type = storage_opts.pop('type') + return PrimaryAlgo(space, config) - log.debug("Creating %s storage client with args: %s", storage_type, storage_opts) - try: - Storage(of_type=storage_type, config=config, **storage_opts) - except ValueError: - if Storage().__class__.__name__.lower() != storage_type.lower(): - raise + +def _instantiate_strategy(config=None): + """Instantiate the strategy object + + Parameters + ---------- + config: dict, optional + Configuration of the strategy. If None of empty, system's defaults are used + (orion.core.config.producer.strategy). + + """ + if not config: + config = orion.core.config.experiment.strategy + + if isinstance(config, str): + strategy_type = config + config = {} + else: + strategy_type, config = next(iter(config.items())) + + return Strategy(of_type=strategy_type, **config) + + +def _register_experiment(experiment): + """Register a new experiment in the database""" + experiment.metadata['datetime'] = datetime.datetime.utcnow() + config = experiment.configuration + # This will raise DuplicateKeyError if a concurrent experiment with + # identical (name, metadata.user) is written first in the database. + + get_storage().create_experiment(config) + + # XXX: Reminder for future DB implementations: + # MongoDB, updates an inserted dict with _id, so should you :P + experiment._id = config['_id'] # pylint:disable=protected-access + + # Update refers in db if experiment is root + if experiment.refers.get('parent_id') is None: + log.debug('update refers (name: %s)', experiment.name) + experiment.refers['root_id'] = experiment.id + get_storage().update_experiment(experiment, refers=experiment.configuration['refers']) + + +def _update_experiment(experiment): + """Update experiment configuration in database""" + log.debug('updating experiment (name: %s)', experiment.name) + config = experiment.configuration + + # TODO: Remove since this should not occur anymore without metadata.user in the indices? + # Writing the final config to an already existing experiment raises + # a DuplicatKeyError because of the embedding id `metadata.user`. + # To avoid this `final_config["name"]` is popped out before + # `db.write()`, thus seamingly breaking the compound index + # `(name, metadata.user)` + config.pop("name") + + get_storage().update_experiment(experiment, **config) + + +def _branch_experiment(experiment, conflicts, version, branching_arguments): + """Create a new branch experiment with adapters for the given conflicts""" + experiment_brancher = ExperimentBranchBuilder(conflicts, **branching_arguments) + + needs_manual_resolution = (not experiment_brancher.is_resolved or + experiment_brancher.manual_resolution) + + if not experiment_brancher.is_resolved: + name_conflict = conflicts.get([ExperimentNameConflict])[0] + if not name_conflict.is_resolved and not version: + raise RaceCondition('There was likely a race condition during version increment.') + + if needs_manual_resolution: + # TODO: This should only be possible when using cmdline API + branching_prompt = BranchingPrompt(experiment_brancher) + + if not sys.__stdin__.isatty(): + raise BranchingEvent(branching_prompt.get_status()) + + branching_prompt.cmdloop() + + if branching_prompt.abort or not experiment_brancher.is_resolved: + sys.exit() + + config = experiment_brancher.conflicting_config + config['refers']['adapter'] = experiment_brancher.create_adapters().configuration + config['refers']['parent_id'] = experiment.id + + config.pop('_id') + + return create_experiment(**config) + + +def _get_conflicts(experiment, branching): + """Get conflicts between current experiment and corresponding configuration in database""" + db_experiment = build_view(experiment.name, experiment.version) + conflicts = detect_conflicts(db_experiment.configuration, experiment.configuration, + branching) + + # elif must_branch and not enable_branching: + # raise ValueError("Configuration is different and generate a branching event") + + return conflicts + + +def _fetch_config_version(configs, version=None): + """Fetch the experiment configuration corresponding to the given version + + Parameters + ---------- + configs: list + List of configurations fetched from storoge. + version: int, optional + Version to select. If None, last version will be selected. If version given is larger than + largest version available, the largest version will be selected. + + """ + max_version = max(configs, key=lambda exp: exp.get('version', 1)).get('version', 1) + + if version is None: + version = max_version + else: + version = version + + if version > max_version: + log.warning("Version %s was specified but most recent version is only %s. " + "Using %s.", version, max_version, max_version) + + version = min(version, max_version) + + configs = filter(lambda exp: exp.get('version', 1) == version, configs) + + return next(iter(configs)) + + +### +# Functions for commandline API +### + +def build_from_args(cmdargs): + """Build an experiment based on commandline arguments. + + Options provided in commandline and configuration file (--config) will overwrite system's + default values and configuration from database if experiment already exits. + Commandline arguments have precedence over configuration file options. + + .. seealso:: + + :func:`orion.core.io.experiment_builder.build` for more information on experiment creation. + + """ + cmd_config = get_cmd_config(cmdargs) + + if 'name' not in cmd_config: + raise NoNameError() + + setup_storage(cmd_config['storage'], debug=cmd_config.get('debug')) + + return build(**cmd_config) + + +def build_view_from_args(cmdargs): + """Build an experiment view based on commandline arguments + + .. seealso:: + + :func:`orion.core.io.experiment_builder.build_view` for more information on experiment view + creation. + + """ + cmd_config = get_cmd_config(cmdargs) + + if 'name' not in cmd_config: + raise NoNameError() + + setup_storage(cmd_config['storage'], debug=cmd_config.get('debug')) + + name = cmd_config.get('name') + version = cmd_config.get('version') + + return build_view(name, version) + + +def get_cmd_config(cmdargs): + """Fetch configuration defined by commandline and local configuration file. + + Arguments of commandline have priority over options in configuration file. + """ + cmdargs = resolve_config.fetch_config_from_cmdargs(cmdargs) + cmd_config = resolve_config.fetch_config(cmdargs) + cmd_config = resolve_config.merge_configs(cmd_config, cmdargs) + + cmd_config.update(cmd_config.pop('experiment', {})) + cmd_config['branching'] = cmd_config.pop('evc', {}) + + metadata = resolve_config.fetch_metadata(cmd_config.get('user'), cmd_config.get('user_args')) + cmd_config['metadata'] = metadata + cmd_config.pop('config', None) + + backward.populate_space(cmd_config) + backward.update_db_config(cmd_config) + + return cmd_config diff --git a/src/orion/core/io/orion_cmdline_parser.py b/src/orion/core/io/orion_cmdline_parser.py index 9ffa43367..ffc4639e7 100644 --- a/src/orion/core/io/orion_cmdline_parser.py +++ b/src/orion/core/io/orion_cmdline_parser.py @@ -18,7 +18,10 @@ from collections import defaultdict, OrderedDict import copy +import errno +import os import re +import shutil from orion.core.io.cmdline_parser import CmdlineParser from orion.core.io.convert import infer_converter_from_file_type @@ -51,6 +54,8 @@ class OrionCmdlineParser(): An OrderedDict obtained by parsing the config file, if one was found. priors : OrderedDict An OrderedDict obtained from merging `cmd_priors` and `file_priors`. + user_script : str + File path of the script executed (inferred from parsed commandline) config_prefix : str Prefix for the configuration file used by the parser to identify it. file_config_path : str @@ -67,7 +72,7 @@ class OrionCmdlineParser(): """ - def __init__(self, config_prefix='config'): + def __init__(self, config_prefix='config', allow_non_existing_user_script=False): """Create an `OrionCmdlineParser`.""" self.parser = CmdlineParser() self.cmd_priors = OrderedDict() @@ -78,6 +83,9 @@ def __init__(self, config_prefix='config'): self.file_config_path = None self.converter = None + self.allow_non_existing_user_script = allow_non_existing_user_script + self.user_script = None + # Extraction methods for the file parsing part. self._extraction_method = {dict: self._extract_dict, defaultdict: self._extract_defaultdict, @@ -126,6 +134,7 @@ def parse(self, commandline): If a prior inside the commandline and the config file have the same name. """ + self.infer_user_script(commandline) replaced = self._replace_priors(commandline) configuration = self.parser.parse(replaced) self._build_priors(configuration) @@ -135,6 +144,24 @@ def parse(self, commandline): raise ValueError("Conflict: definition of same prior in commandline and config: " "{}".format(duplicated_priors)) + def infer_user_script(self, user_args): + """Infer the script name and perform some checks""" + if not user_args: + return + + # TODO: Parse commandline for any options to python and pick the script filepath properly + if user_args[0] == 'python': + user_script = user_args[1] + else: + user_script = user_args[0] + + if (not os.path.exists(user_script) and not shutil.which(user_script) and + not self.allow_non_existing_user_script): + raise OSError(errno.ENOENT, "The path specified for the script does not exist", + user_script) + + self.user_script = user_script + @property def priors(self): """Return an OrderedDict obtained from merging `cmd_priors` and `file_priors`.""" @@ -408,16 +435,16 @@ def _create_config_file(self, config_path, trial): # Create a copy of the template instance = copy.deepcopy(self.config_file_data) - for param in trial.params: + for name, value in trial.params.items(): # The param will only correspond to config keyd # that require a prior, so we make sure to skip # the ones that do not. - if param.name not in self.file_priors.keys(): + if name not in self.file_priors.keys(): continue # Since namespace start with '/', we must skip # the first element of the list. - path = param.name.split('/')[1:] + path = name.split('/')[1:] current_depth = instance for key in path: @@ -436,7 +463,7 @@ def _create_config_file(self, config_path, trial): break if isinstance(current_depth[key], str): - current_depth[key] = param.value + current_depth[key] = value else: current_depth = current_depth[key] @@ -445,9 +472,9 @@ def _create_config_file(self, config_path, trial): def _build_configuration(self, trial): configuration = copy.deepcopy(self.parser.arguments) - for param in trial.params: - name = param.name.lstrip('/') - configuration[name] = param.value + for name, value in trial.params.items(): + name = name.lstrip('/') + configuration[name] = value return configuration diff --git a/src/orion/core/io/resolve_config.py b/src/orion/core/io/resolve_config.py index 253f5129b..f0471deea 100644 --- a/src/orion/core/io/resolve_config.py +++ b/src/orion/core/io/resolve_config.py @@ -31,7 +31,7 @@ .. note:: `Optimization` entries are required, `Dynamic` entry is optional. """ -import errno +import copy import getpass import hashlib import logging @@ -44,6 +44,8 @@ import orion import orion.core from orion.core import config +from orion.core.io.orion_cmdline_parser import OrionCmdlineParser +from orion.core.utils.flatten import unflatten def is_exe(path): @@ -80,16 +82,183 @@ def is_exe(path): ) +def _convert_dashes(config, ref): + """Convert dash in keys to underscores based on a reference dict. + + The reference is used to avoid converting keys in dictionary that are values + of options. + """ + config = copy.deepcopy(config) + for key in list(config.keys()): + converted_key = key.replace('-', '_') + if converted_key in ref: + config[converted_key] = config.pop(key) + + if all(isinstance(item[converted_key], dict) for item in [config, ref]): + config[converted_key] = _convert_dashes(config[converted_key], ref[converted_key]) + + return config + + +# NOTE: Silencing this pylint error for now, but seriously this function is quite horrible. +# We'll need to clean this up at some point... +# pylint:disable=too-many-branches +def fetch_config_from_cmdargs(cmdargs): + """Turn flat cmdargs into nested dicts like orion.core.config.""" + config_file = cmdargs.pop('config', None) + tmp_cmdargs = copy.deepcopy(cmdargs) + tmp_cmdargs['config'] = config_file + cmdargs['config'] = config_file + cmdargs = tmp_cmdargs + + cmdargs_config = {} + + if cmdargs.get('max_trials') is not None: + log.warning( + '--max-trials is deprecated and will be removed in v0.3. ' + 'Use --exp-max-trials instead') + cmdargs_config['experiment.max_trials'] = cmdargs.pop('max_trials') + + if cmdargs.get('worker_trials') is not None: + log.warning( + '--worker-trials is deprecated and will be removed in v0.3. ' + 'Use --worker-max-trials instead') + cmdargs_config['worker.max_trials'] = cmdargs.pop('worker_trials') + + mappings = dict( + experiment=dict( + exp_max_broken='max_broken', + exp_max_trials='max_trials'), + worker=dict( + worker_max_broken='max_broken', + worker_max_trials='max_trials')) + + mappings = dict( + experiment=dict( + max_broken='exp_max_broken', + max_trials='exp_max_trials'), + worker=dict( + max_broken='worker_max_broken', + max_trials='worker_max_trials')) + + global_config = config.to_dict() + + for key in ['config', 'user_args']: + if cmdargs.get(key) not in [False, None]: + cmdargs_config[key] = cmdargs[key] + + for key in ['name', 'user', 'version']: + if cmdargs.get(key) not in [False, None]: + cmdargs_config[f'experiment.{key}'] = cmdargs[key] + + for key in ['branch_from', 'branch_to']: + if cmdargs.get(key) not in [False, None]: + cmdargs_config[f'evc.{key}'] = cmdargs[key] + + # Apply config at the root + for key in ['debug']: + + # Adapt to cli arguments + cli_key = mappings.get(key, key) + + value = cmdargs.pop(cli_key, None) + if value is not None: + cmdargs_config[f'{key}'] = value + + # Apply to subconfigs + for key in ['experiment', 'worker', 'evc']: + for subkey in global_config[key].keys(): + + # Adapt to cli arguments + cli_key = mappings.get(key, {}).get(subkey, subkey) + + value = cmdargs.pop(cli_key, None) + if value is not None: + cmdargs_config[f'{key}.{subkey}'] = value + + return unflatten(cmdargs_config) + + def fetch_config(args): """Return the config inside the .yaml file if present.""" orion_file = args.get('config') - config = dict() + local_config = {} if orion_file: log.debug("Found orion configuration file at: %s", os.path.abspath(orion_file.name)) orion_file.seek(0) - config = yaml.safe_load(orion_file) - - return config + tmp_config = yaml.safe_load(orion_file) + + global_config = config.to_dict() + + tmp_config = _convert_dashes(tmp_config, global_config) + + # Fix deprecations first because some names are shared by experiment and worker + max_trials = tmp_config.pop('max_trials', None) + if max_trials is not None: + log.warning( + '(DEPRECATED) Option `max_trials` is deprecated ' + 'and will be removed in v0.3. Use instead the option' + '\nexperiment:\n max_trials: %s', max_trials) + local_config['experiment.max_trials'] = max_trials + + worker_trials = tmp_config.get('experiment', {}).pop('worker_trials', None) + if worker_trials is not None: + log.warning( + '(DEPRECATED) Option `experiment.worker_trials` is deprecated ' + 'and will be removed in v0.3. Use instead the option' + '\nworker:\n max_trials: %s', worker_trials) + local_config['worker.max_trials'] = worker_trials + + worker_trials = tmp_config.pop('worker_trials', None) + if worker_trials is not None: + log.warning( + '(DEPRECATED) Option `worker_trials` is deprecated ' + 'and will be removed in v0.3. Use instead the option' + '\nworker:\n max_trials: %s', worker_trials) + local_config['worker.max_trials'] = worker_trials + + producer = tmp_config.pop('producer', None) + if producer is not None: + log.warning( + '(DEPRECATED) Option `producer` is deprecated ' + 'and will be removed in v0.3. Use instead the option' + '\nexperiment:\n strategy: %s', producer['strategy']) + local_config['experiment.strategy'] = producer['strategy'] + + producer = tmp_config.get('experiment', {}).pop('producer', None) + if producer is not None: + log.warning( + '(DEPRECATED) Option `experiment.producer` is deprecated ' + 'and will be removed in v0.3. Use instead the option' + '\nexperiment:\n strategy: %s', producer['strategy']) + local_config['experiment.strategy'] = producer['strategy'] + + local_config = unflatten(local_config) + + # For backward compatibility + for key in ['storage', 'experiment', 'worker', 'evc']: + subkeys = list(global_config[key].keys()) + + # Arguments that are only supported locally + if key == 'experiment': + subkeys += ['name', 'version', 'user'] + elif key == 'evc': + subkeys += ['branch_from', 'branch_to'] + + for subkey in subkeys: + # Backward compatibility + backward_value = tmp_config.pop(subkey, None) + if backward_value is not None: + log.warning( + '(DEPRECATED) Option `%s` and will be removed in v0.3. ' + 'Use instead the option' '\n%s:\n %s:\n %s', + subkey, key, subkey, yaml.dump(backward_value, indent=6)) + value = tmp_config.get(key, {}).pop(subkey, backward_value) + if value is not None: + local_config.setdefault(key, {}) + local_config[key][subkey] = value + + return local_config def fetch_default_options(): @@ -158,42 +327,36 @@ def fetch_env_vars(): return env_vars -def fetch_metadata(cmdargs): +def fetch_metadata(user=None, user_args=None): """Infer rest information about the process + versioning""" - metadata = {} + metadata = {'user': user if user else getpass.getuser()} metadata['orion_version'] = orion.core.__version__ - # Move 'user_script' and 'user_args' to 'metadata' key - user_args = cmdargs.get('user_args', []) + if user_args is None: + user_args = [] # Trailing white space are catched by argparse as an empty argument if len(user_args) == 1 and user_args[0] == '': user_args = [] - user_script = user_args[0] if user_args else None - - if user_script: - abs_user_script = os.path.abspath(user_script) - if is_exe(abs_user_script): - user_script = abs_user_script - - if user_script and not os.path.exists(user_script): - raise OSError(errno.ENOENT, "The path specified for the script does not exist", user_script) + cmdline_parser = OrionCmdlineParser(config.worker.user_script_config) + cmdline_parser.parse(user_args) - if user_script: - metadata['user_script'] = user_script - metadata['VCS'] = infer_versioning_metadata(metadata['user_script']) + if cmdline_parser.user_script: + # TODO: Remove this, it is all in cmdline_parser now + metadata['user_script'] = cmdline_parser.user_script + metadata['VCS'] = infer_versioning_metadata(cmdline_parser.user_script) if user_args: - metadata['user_args'] = user_args[1:] + # TODO: Remove this, it is all in cmdline_parser now + metadata['user_args'] = user_args - metadata['user'] = getpass.getuser() return metadata def merge_configs(*configs): - """Merge configuration dictionnaries following the given hierarchy + """Merge configuration dictionaries following the given hierarchy Suppose function is called as merge_configs(A, B, C). Then any pair (key, value) in C would overwrite any previous value from A or B. Same apply for B over A. diff --git a/src/orion/core/io/space_builder.py b/src/orion/core/io/space_builder.py index 978bdbc10..c49729814 100644 --- a/src/orion/core/io/space_builder.py +++ b/src/orion/core/io/space_builder.py @@ -44,8 +44,8 @@ from scipy.stats import distributions as sp_dists from orion.algo.space import (Categorical, Fidelity, Integer, Real, Space) -from orion.core import config as orion_config -from orion.core.io.orion_cmdline_parser import OrionCmdlineParser +from orion.core.utils.flatten import flatten + log = logging.getLogger(__name__) @@ -117,7 +117,7 @@ class DimensionBuilder(object): Real(name=learning_rate, prior={reciprocal: (0.001, 1), {}}, shape=(10,)) >>> dimbuilder.build('something_else', 'poisson(mu=3)') Integer(name=something_else, prior={poisson: (), {'mu': 3}}, shape=()) - >>> dim = dimbuilder.build('other2', 'random(-5, 2)') + >>> dim = dimbuilder.build('other2', 'uniform(-5, 2)') >>> dim Real(name=other2, prior={uniform: (-5, 7), {}}, shape=()) >>> dim.interval() @@ -256,23 +256,6 @@ def __init__(self): self.converter = None self.parser = None - def build_from(self, config): - """Build a `Space` object from a configuration. - - Initialize a new parser for this commandline and parse the given config then - build a `Space` object from that configuration. - - Returns - ------- - `orion.algo.space.Space` - The problem's search space definition. - - """ - self.parser = OrionCmdlineParser(orion_config.user_script_config) - self.parser.parse(config) - - return self.build(self.parser.priors) - def build(self, configuration): """Create a definition of the problem's search space. @@ -292,7 +275,7 @@ def build(self, configuration): """ self.space = Space() - for namespace, expression in configuration.items(): + for namespace, expression in flatten(configuration).items(): if _should_not_be_built(expression): continue diff --git a/src/orion/core/utils/__init__.py b/src/orion/core/utils/__init__.py index a3d76f277..c0c90441c 100644 --- a/src/orion/core/utils/__init__.py +++ b/src/orion/core/utils/__init__.py @@ -34,7 +34,7 @@ class SingletonAlreadyInstantiatedError(ValueError): def __init__(self, name): """Pass the same constant message to ValueError underneath.""" - super().__init__('No singleton instance of (type: {}) was created' + super().__init__("A singleton instance of (type: {}) has already been instantiated." .format(name)) @@ -45,7 +45,7 @@ class SingletonNotInstantiatedError(TypeError): def __init__(self, name): """Pass the same constant message to TypeError underneath.""" - super().__init__("A singleton instance of (type: {}) has already been instantiated." + super().__init__('No singleton instance of (type: {}) was created' .format(name)) diff --git a/src/orion/core/utils/backward.py b/src/orion/core/utils/backward.py index 78593c830..6b515de21 100644 --- a/src/orion/core/utils/backward.py +++ b/src/orion/core/utils/backward.py @@ -14,17 +14,34 @@ from orion.core.io.orion_cmdline_parser import OrionCmdlineParser +def update_user_args(metadata): + """Make sure user script is not removed from metadata""" + if "user_script" in metadata and metadata["user_script"] not in metadata["user_args"]: + metadata["user_args"] = [metadata["user_script"]] + metadata["user_args"] + + def populate_priors(metadata): """Compute parser state and priors based on user_args and populate metadata.""" if 'user_args' not in metadata: return - parser = OrionCmdlineParser(orion.core.config.user_script_config) + update_user_args(metadata) + + parser = OrionCmdlineParser(orion.core.config.worker.user_script_config, + allow_non_existing_user_script=True) parser.parse(metadata["user_args"]) metadata["parser"] = parser.get_state_dict() metadata["priors"] = dict(parser.priors) +def populate_space(config): + """Add the space definition at the root of config.""" + populate_priors(config['metadata']) + # Overwrite space to make sure to include changes from user_args + if 'priors' in config['metadata']: + config['space'] = config['metadata']['priors'] + + def db_is_outdated(database): """Return True if the database scheme is outdated.""" deprecated_indices = [('name', 'metadata.user'), ('name', 'metadata.user', 'version'), @@ -32,3 +49,11 @@ def db_is_outdated(database): index_information = database.index_information('experiments') return any(index in deprecated_indices for index in index_information.keys()) + + +def update_db_config(config): + """Merge DB config back into storage config""" + config.setdefault('storage', orion.core.config.storage.to_dict()) + if 'database' in config: + config['storage'] = {'type': 'legacy'} + config['storage']['database'] = config.pop('database') diff --git a/src/orion/core/utils/exceptions.py b/src/orion/core/utils/exceptions.py index 31328dedb..c7f855c0f 100644 --- a/src/orion/core/utils/exceptions.py +++ b/src/orion/core/utils/exceptions.py @@ -9,10 +9,26 @@ """ +NO_CONFIGURATION_FOUND = """\ +No commandline configuration found for new experiment.""" + + +NO_EXP_NAME_PROVIDED = """\ +No name provided for the experiment.""" + + class NoConfigurationError(Exception): """Raise when commandline configuration is empty.""" - pass + def __init__(self, message=NO_CONFIGURATION_FOUND): + super().__init__(message) + + +class NoNameError(Exception): + """Raise when no name is provided for an experiment.""" + + def __init__(self, message=NO_EXP_NAME_PROVIDED): + super().__init__(message) class CheckError(Exception): @@ -25,3 +41,77 @@ class RaceCondition(Exception): """Raise when a race condition occured.""" pass + + +MISSING_RESULT_FILE = """ +Cannot parse result file. + +Make sure to report results in file `$ORION_RESULTS_PATH`. +This can be done with `orion.client.cli.report_objective()`. +""" + + +class MissingResultFile(Exception): + """Raise when no result file (or empty) at end of trial execution.""" + + def __init__(self, message=MISSING_RESULT_FILE): + super().__init__(message) + + +BRANCHING_ERROR_MESSAGE = """\ +Configuration is different and generates a branching event: +{} + +Hint +---- + +This error is typically caused by the following 2 reasons: + 1) Commandline calls where arguments are different from one worker to another + (think of paths that are worker specific). There will be --cli-change-type + in the error message above if it is the case. + 2) User script that writes to the repository of the script, causing changes in the code + and therefore leading to branching events. There will be --code-change-type + in the error message above if it is the case. + +For each case you should: + 1) Use --non-monitored-arguments [ARGUMENT_NAME] + (where you argument would be --argument-name, note the lack of dashes at + the beginning and the underscores instead of dashes between words) + The commandline argument only support one entry. To ignore many arguments, + you can use the option in a local config file, or in the global config file: + ``` + evc: + non_monitored_arguments: ['FIRST_ARG', 'ANOTHER_ARG'] + ``` + + 2) Avoid writing data in your repository. It should only be code anyway, right? :) + Otherwise, you can ignore code changes altogether with option --ignore-code-changes. + +""" + + +class BranchingEvent(Exception): + """Raise when conflicts could not be automatically resolved.""" + + def __init__(self, status, message=BRANCHING_ERROR_MESSAGE): + super().__init__(message.format(status)) + + +class SampleTimeout(Exception): + """Raised when the algorithm is not able to sample new unique points in time""" + + pass + + +class WaitingForTrials(Exception): + """Raised when the algorithm needs to wait for some trials to complete before it can suggest new + ones + """ + + pass + + +class BrokenExperiment(Exception): + """Raised when too many trials failed in an experiment and it is now considered broken""" + + pass diff --git a/src/orion/core/utils/format_terminal.py b/src/orion/core/utils/format_terminal.py new file mode 100644 index 000000000..79f1fc1e5 --- /dev/null +++ b/src/orion/core/utils/format_terminal.py @@ -0,0 +1,404 @@ +# -*- coding: utf-8 -*- +""" +:mod:`orion.core.utils.format_terminal` -- Utility functions for formatting prints to terminal +============================================================================================== + +.. module:: format_terminal + :platform: Unix + :synopsis: Functions to build strings for terminal prints + +""" + + +INFO_TEMPLATE = """\ +{identification} + +{commandline} + +{configuration} + +{algorithm} + +{space} + +{metadata} + +{refers} + +{stats} +""" + + +def format_info(experiment): + """Render a string for all info of experiment""" + info_string = INFO_TEMPLATE.format( + identification=format_identification(experiment), + commandline=format_commandline(experiment), + configuration=format_config(experiment), + algorithm=format_algorithm(experiment), + space=format_space(experiment), + metadata=format_metadata(experiment), + refers=format_refers(experiment), + stats=format_stats(experiment)) + + return info_string + + +TITLE_TEMPLATE = """\ +{title} +{empty:=<{title_len}}\ +""" + + +def format_title(title): + """Render a title above an horizontal bar""" + title_string = TITLE_TEMPLATE.format( + title=title, + title_len=len(title), + empty='') + + return title_string + + +DICT_EMPTY_LEAF_TEMPLATE = "{tab}{key}\n" +DICT_LEAF_TEMPLATE = "{tab}{key}: {value}\n" +DICT_NODE_TEMPLATE = "{tab}{key}:\n{value}\n" + + +def format_dict(dictionary, depth=0, width=4, templates=None): + r"""Render a dict on multiple lines + + Parameters + ---------- + dictionary: dict + The dictionary to render + depth: int + Tab added at the beginning of every lines + width: int + Size of the tab added to each line, multiplied + by the depth of the object in the dict of dicts. + templates: dict + Templates for `empty_leaf`, `leaf` and `dict_node`. + Default is + `empty_leaf="{tab}{key}"` + `leaf="{tab}{key}: {value}\n"` + `dict_node="{tab}{key}:\n{value}\n"` + + Examples + -------- + >>> print(format_dict({1: {2: 3, 3: 4}, 2: {3: 4, 4: {5: 6}}})) + 1: + 2: 3 + 3: 4 + 2: + 3: 4 + 4: + 5: 6 + >>> templates = {'leaf': '{tab}{key}={value}\n', 'dict_node': '{tab}{key}:\n{value}\n'} + >>> print(format_dict({1: {2: 3, 3: 4}, 2: {3: 4, 4: {5: 6}}}, templates=templates)) + 1: + 2=3 + 3=4 + 2: + 3=4 + 4: + 5=6 + + """ + if isinstance(dictionary, (list, tuple)): + return format_list(dictionary, depth, width=width, templates=templates) + + # To avoid using mutable objects as default values in function signature. + if templates is None: + templates = dict() + + empty_leaf_template = templates.get('empty_leaf', DICT_EMPTY_LEAF_TEMPLATE) + leaf_template = templates.get('leaf', DICT_LEAF_TEMPLATE) + node_template = templates.get('dict_node', DICT_NODE_TEMPLATE) + + dict_string = "" + for key in sorted(dictionary.keys()): + tab = (" " * (depth * width)) + value = dictionary[key] + if isinstance(value, (dict, list, tuple)): + if not value: + dict_string += empty_leaf_template.format(tab=tab, key=key) + else: + subdict_string = format_dict( + value, depth + 1, width=width, templates=templates) + dict_string += node_template.format(tab=tab, key=key, value=subdict_string) + else: + dict_string += leaf_template.format(tab=tab, key=key, value=value) + + return dict_string.replace(' \n', '\n').rstrip("\n") + + +LIST_TEMPLATE = """\ +{tab}[ +{items} +{tab}]\ +""" +LIST_ITEM_TEMPLATE = "{tab}{item}\n" +LIST_NODE_TEMPLATE = "{item}\n" + + +def format_list(a_list, depth=0, width=4, templates=None): + r"""Render a list on multiple lines + + Parameters + ---------- + a_list: list + The list to render + depth: int + Tab added at the beginning of every lines + width: int + Size of the tab added to each line, multiplied + by the depth of the object in the list of lists. + templates: dict + Templates for `list`, `item` and `list_node`. + Default is + `list="{tab}[\n{items}\n{tab}]"` + `item="{tab}{item}\n"` + `list_node="{item}\n"` + + Examples + -------- + >>> print(format_list([1, [2, 3], 4, [5, 6, 7, 8]])) + [ + 1 + [ + 2 + 3 + ] + 4 + [ + 5 + 6 + 7 + 8 + ] + ] + >>> templates = {} + >>> templates['list'] = '{tab}\n{items}\n{tab}' + >>> templates['item'] = '{tab}- {item}\n' + >>> templates['list_node'] = '{tab}{item}\n' + >>> print(format_list([1, [2, 3], 4, [5, 6, 7, 8]], width=2, templates=templates)) + - 1 + + - 2 + - 3 + + - 4 + + - 5 + - 6 + - 7 + - 8 + + """ + # To avoid using mutable objects as default values in function signature. + if templates is None: + templates = dict() + + list_template = templates.get('list', LIST_TEMPLATE) + item_template = templates.get('item', LIST_ITEM_TEMPLATE) + node_template = templates.get('list_node', LIST_NODE_TEMPLATE) + + tab = (" " * (depth * width)) + list_string = "" + for i, item in enumerate(a_list, 1): + subtab = (" " * ((depth + 1) * width)) + if isinstance(item, (dict, list, tuple)): + item_string = format_dict(item, depth + 1, width=width, templates=templates) + list_string += node_template.format(tab=subtab, id=i, item=item_string) + else: + list_string += item_template.format(tab=subtab, id=i, item=item) + + return list_template.format(tab=tab, items=list_string.rstrip("\n")) + + +ID_TEMPLATE = """\ +{title} +name: {name} +version: {version} +user: {user} +""" + + +def format_identification(experiment): + """Render a string for identification section""" + identification_string = ID_TEMPLATE.format( + title=format_title("Identification"), + name=experiment.name, + version=experiment.version, + user=experiment.metadata['user']) + + return identification_string + + +COMMANDLINE_TEMPLATE = """\ +{title} +{commandline} +""" + + +def format_commandline(experiment): + """Render a string for commandline section""" + if 'user_args' not in experiment.metadata: + return '' + + commandline_string = COMMANDLINE_TEMPLATE.format( + title=format_title("Commandline"), + commandline=" ".join(experiment.metadata['user_args'])) + + return commandline_string + + +CONFIG_TEMPLATE = """\ +{title} +pool size: {experiment.pool_size} +max trials: {experiment.max_trials} +""" + + +def format_config(experiment): + """Render a string for config section""" + config_string = CONFIG_TEMPLATE.format( + title=format_title("Config"), + experiment=experiment) + + return config_string + + +ALGORITHM_TEMPLATE = """\ +{title} +{configuration} +""" + + +def format_algorithm(experiment): + """Render a string for algorithm section""" + algorithm_string = ALGORITHM_TEMPLATE.format( + title=format_title("Algorithm"), + configuration=format_dict(experiment.configuration['algorithms'])) + + return algorithm_string + + +SPACE_TEMPLATE = """\ +{title} +{params} +""" + + +def format_space(experiment): + """Render a string for space section""" + space_string = SPACE_TEMPLATE.format( + title=format_title("Space"), + params="\n".join(name + ": " + experiment.space[name].get_prior_string() + for name in experiment.space.keys())) + + return space_string + + +METADATA_TEMPLATE = """\ +{title} +user: {experiment.metadata[user]} +datetime: {experiment.metadata[datetime]} +orion version: {experiment.metadata[orion_version]} +VCS: +{vcs} +""" + + +def format_metadata(experiment): + """Render a string for metadata section""" + metadata_string = METADATA_TEMPLATE.format( + title=format_title("Meta-data"), + experiment=experiment, + vcs=format_dict(experiment.metadata.get('VCS', {}), depth=1, width=2)) + + return metadata_string + + +REFERS_TEMPLATE = """\ +{title} +root: {root} +parent: {parent} +adapter: {adapter} +""" + + +def format_refers(experiment): + """Render a string for refers section""" + if experiment.node.root is experiment.node: + root = '' + parent = '' + adapter = '' + else: + root = experiment.node.root.name + parent = experiment.node.parent.name + adapter = "\n" + format_dict(experiment.refers['adapter'].configuration, depth=1, width=2) + + refers_string = REFERS_TEMPLATE.format( + title=format_title("Parent experiment"), + root=root, + parent=parent, + adapter=adapter) + + return refers_string + + +STATS_TEMPLATE = """\ +{title} +trials completed: {stats[trials_completed]} +best trial: + id: {stats[best_trials_id]} + evaluation: {stats[best_evaluation]} + params: +{best_params} +start time: {stats[start_time]} +finish time: {stats[finish_time]} +duration: {stats[duration]} +""" + + +NO_STATS_TEMPLATE = """\ +{title} +No trials executed... +""" + + +def format_stats(experiment): + """Render a string for stat section + + Parameters + ---------- + experiment: `orion.core.worker.experiment.Experiment` + templates: dict + templates for the title and `stats`. + See `format_title` for more info. + + """ + stats = experiment.stats + if not stats: + return NO_STATS_TEMPLATE.format( + title=format_title("Stats")) + + best_params = get_trial_params(stats['best_trials_id'], experiment) + + stats_string = STATS_TEMPLATE.format( + title=format_title("Stats"), + stats=stats, + best_params=format_dict(best_params, depth=2, width=2)) + + return stats_string + + +def get_trial_params(trial_id, experiment): + """Get params from trial_id in given experiment""" + best_trial = experiment.get_trial(uid=trial_id) + if not best_trial: + return {} + + return best_trial.params diff --git a/src/orion/core/utils/format_trials.py b/src/orion/core/utils/format_trials.py index cbccafd91..381e41cdd 100644 --- a/src/orion/core/utils/format_trials.py +++ b/src/orion/core/utils/format_trials.py @@ -10,6 +10,7 @@ """ +from orion.core.utils.flatten import flatten from orion.core.worker.trial import Trial @@ -19,7 +20,7 @@ def trial_to_tuple(trial, space): The order within the tuple is dictated by the defined `orion.algo.space.Space` object. """ - params = {param.name: param.value for param in trial.params} + params = flatten(trial.params) trial_keys = set(params.keys()) space_keys = set(space.keys()) if trial_keys != space_keys: @@ -27,10 +28,39 @@ def trial_to_tuple(trial, space): The trial {} has wrong params: Trial params: {} Space dims: {}""".format(trial.id, sorted(trial_keys), sorted(space_keys))) - return tuple(params[name] for name in space.keys()) +def dict_to_trial(data, space): + """Create a `orion.core.worker.trial.Trial` object from `data`, + filling only parameter information from `data`. + + :param data: A dict representing a sample point from `space`. + :param space: Definition of problem's domain. + :type space: `orion.algo.space.Space` + """ + data = flatten(data) + params = [] + for name, dim in space.items(): + if name not in data and dim.default_value is dim.NO_DEFAULT_VALUE: + raise ValueError( + 'Dimension {} not specified and does not have a default value.'.format(name)) + value = data.get(name, dim.default_value) + + if value not in dim: + error_msg = "Dimension {} value {} is outside of prior {}".format( + name, value, dim.get_prior_string()) + raise ValueError(error_msg) + + params.append(dict( + name=dim.name, + type=dim.type, + value=value + )) + assert len(params) == len(space) + return Trial(params=params) + + def tuple_to_trial(data, space): """Create a `orion.core.worker.trial.Trial` object from `data`, filling only parameter information from `data`. diff --git a/src/orion/core/utils/tests.py b/src/orion/core/utils/tests.py index 44a498126..18965f2db 100644 --- a/src/orion/core/utils/tests.py +++ b/src/orion/core/utils/tests.py @@ -18,11 +18,12 @@ from orion.core.io.database.ephemeraldb import EphemeralDB from orion.core.io.database.mongodb import MongoDB from orion.core.io.database.pickleddb import PickledDB +import orion.core.io.experiment_builder as experiment_builder from orion.core.utils import SingletonAlreadyInstantiatedError -from orion.core.worker.experiment import Experiment from orion.core.worker.trial import Trial from orion.storage.base import get_storage, Storage from orion.storage.legacy import Legacy +from orion.storage.track import Track def _select(lhs, rhs): @@ -45,19 +46,45 @@ def utcnow(cls): return default_datetime() -def _get_default_test_database(): - """Return default configuration for the test database""" - _, filename = tempfile.mkstemp('orion_test') - +def _get_default_test_storage(): + """Return default configuration for the test storage""" return { - 'storage_type': 'legacy', - 'type': 'PickledDB', - 'host': filename + 'type': 'legacy', + 'database': { + 'type': 'PickledDB', + 'host': '${file}' + } } +def _remove(file_name): + if file_name is None: + return + + try: + os.remove(file_name) + except FileNotFoundError: + pass + + +SINGLETONS = (Storage, Legacy, Database, MongoDB, PickledDB, EphemeralDB, Track) + + +def update_singletons(values=None): + """Replace singletons by given values and return previous singleton objects""" + if values is None: + values = {} + + singletons = {} + for singleton in SINGLETONS: + singletons[singleton] = singleton.instance + singleton.instance = values.get(singleton, None) + + return singletons + + # pylint: disable=no-self-use,protected-access -class OrionState: +class BaseOrionState: """Setup global variables and singleton for tests. It swaps the singleton with `None` at startup and restores them after the tests. @@ -66,23 +93,20 @@ class OrionState: Parameters ---------- - config: YAML - YAML config to apply for this test - - experiments: list + experiments: list, optional List of experiments to insert into the database - - trials: list + trials: list, optional List of trials to insert into the database - - workers: list + workers: list, optional List of workers to insert into the database - - resources: list + lies: list, optional + List of lies to insert into the database + resources: list, optional List of resources to insert into the database - - database: dict - Configuration of the underlying database + from_yaml: YAML, optional + YAML config to apply for this test + storage: dict, optional + Configuration of the underlying storage backend Examples -------- @@ -93,41 +117,44 @@ class OrionState: """ # TODO: Fix these singletons to remove Legacy, MongoDB, PickledDB and EphemeralDB. - SINGLETONS = (Storage, Legacy, Database, MongoDB, PickledDB, EphemeralDB) singletons = {} - database = None experiments = [] trials = [] resources = [] workers = [] def __init__(self, experiments=None, trials=None, workers=None, lies=None, resources=None, - from_yaml=None, database=None): + from_yaml=None, storage=None): if from_yaml is not None: with open(from_yaml) as f: exp_config = list(yaml.safe_load_all(f)) experiments = exp_config[0] trials = exp_config[1] - self.database_config = _select(database, _get_default_test_database()) - self.experiments = _select(experiments, []) - self.trials = _select(trials, []) - self.workers = _select(workers, []) - self.resources = _select(resources, []) - self.lies = _select(lies, []) + self.tempfile = None + self.storage_config = _select(storage, _get_default_test_storage()) + + self._experiments = _select(experiments, []) + self._trials = _select(trials, []) + self._workers = _select(workers, []) + self._resources = _select(resources, []) + self._lies = _select(lies, []) + + # In case of track we also store the inserted object + # so the user can compare in tests the different values + self.trials = [] + self.experiments = self._experiments + self.lies = [] - def init(self): + def init(self, config): """Initialize environment before testing""" - self.storage() - self.database = get_storage()._db - self.cleanup() + self.storage(config) self.load_experience_configuration() return self - def get_experiment(self, name, user=None, version=None): + def get_experiment(self, name, version=None): """Make experiment id deterministic""" - exp = Experiment(name, user=user, version=version) - exp._id = name + exp = experiment_builder.build(name=name, version=version) return exp def get_trial(self, index): @@ -136,77 +163,159 @@ def get_trial(self, index): def cleanup(self): """Cleanup after testing""" - self.database.remove('experiments', {}) - self.database.remove('trials', {}) + _remove(self.tempfile) + + def _set_tables(self): + self.trials = [] + self.lies = [] + + for exp in self._experiments: + get_storage().create_experiment(exp) + + for trial in self._trials: + nt = get_storage().register_trial(Trial(**trial)) + self.trials.append(nt.to_dict()) + + for lie in self._lies: + nt = get_storage().register_lie(Trial(**lie)) + self.lies.append(nt.to_dict()) def load_experience_configuration(self): """Load an example database.""" - for i, t_dict in enumerate(self.trials): - self.trials[i] = Trial(**t_dict).to_dict() - - for i, t_dict in enumerate(self.lies): - self.lies[i] = Trial(**t_dict).to_dict() - - self.trials.sort(key=lambda obj: int(obj['_id'], 16), reverse=True) - - for i, _ in enumerate(self.experiments): - path = os.path.join( - os.path.dirname(__file__), - self.experiments[i]["metadata"]["user_script"]) - - self.experiments[i]["metadata"]["user_script"] = path - self.experiments[i]['version'] = 1 - self.experiments[i]['_id'] = i - - if self.experiments: - self.database.write('experiments', self.experiments) - if self.trials: - self.database.write('trials', self.trials) - if self.workers: - self.database.write('workers', self.workers) - if self.resources: - self.database.write('resources', self.resources) - if self.lies: - self.database.write('lying_trials', self.lies) + for i, t_dict in enumerate(self._trials): + self._trials[i] = Trial(**t_dict).to_dict() + + for i, t_dict in enumerate(self._lies): + self._lies[i] = Trial(**t_dict).to_dict() + + self._trials.sort(key=lambda obj: int(obj['_id'], 16), reverse=True) + + for i, experiment in enumerate(self._experiments): + if 'user_script' in experiment['metadata']: + path = os.path.join( + os.path.dirname(__file__), + experiment["metadata"]["user_script"]) + experiment["metadata"]["user_script"] = path + + experiment['_id'] = i + + self._set_tables() + + def make_config(self): + """Iterate over the database configuration and replace ${file} + by the name of a temporary file + """ + _, self.tempfile = tempfile.mkstemp('_orion_test') + _remove(self.tempfile) + + def map_dict(fun, dictionary): + """Return a dictionary with fun applied to each values""" + return {k: fun(v) for k, v in dictionary.items()} + + def replace_file(v): + """Replace `${file}` by a generated temporary file""" + if isinstance(v, str): + v = v.replace('${file}', self.tempfile) + + if isinstance(v, dict): + v = map_dict(replace_file, v) + + return v + + return map_dict(replace_file, self.storage_config) def __enter__(self): """Load a new database state""" - for singleton in self.SINGLETONS: - self.new_singleton(singleton, new_value=None) - - return self.init() + self.singletons = update_singletons() + self.cleanup() + return self.init(self.make_config()) def __exit__(self, exc_type, exc_val, exc_tb): """Cleanup database state""" self.cleanup() - for obj in self.singletons: - self.restore_singleton(obj) - - def new_singleton(self, obj, new_value=None): - """Replace a singleton by another value""" - self.singletons[obj] = obj.instance - obj.instance = new_value - - def restore_singleton(self, obj): - """Restore a singleton to its previous value""" - obj.instance = self.singletons.get(obj) + update_singletons(self.singletons) - def storage(self): + def storage(self, config=None): """Return test storage""" - try: - storage_type = self.database_config.pop('storage_type', 'legacy') - config = { - 'database': self.database_config - } - db = Storage(of_type=storage_type, config=config) - self.database_config['storage_type'] = storage_type + if config is None: + return get_storage() + try: + config['of_type'] = config.pop('type') + db = Storage(**config) + self.storage_config = config except SingletonAlreadyInstantiatedError: db = get_storage() except KeyError: - print(self.database_config) + print(self.storage_config) raise return db + + +class LegacyOrionState(BaseOrionState): + """See :func:`~orion.utils.tests.BaseOrionState`""" + + def __init__(self, *args, **kwargs): + super(LegacyOrionState, self).__init__(*args, **kwargs) + self.initialized = False + + @property + def database(self): + """Retrieve legacy database handle""" + return get_storage()._db + + def init(self, config): + """Initialize environment before testing""" + self.storage(config) + self.initialized = True + + if hasattr(get_storage(), '_db'): + self.database.remove('experiments', {}) + self.database.remove('trials', {}) + + self.load_experience_configuration() + return self + + def get_experiment(self, name, version=None): + """Make experiment id deterministic""" + exp = experiment_builder.build(name, version=version) + exp._id = exp.name + return exp + + def _set_tables(self): + if self._experiments: + self.database.write('experiments', self._experiments) + if self._trials: + self.database.write('trials', self._trials) + if self._workers: + self.database.write('workers', self._workers) + if self._resources: + self.database.write('resources', self._resources) + if self._lies: + self.database.write('lying_trials', self._lies) + + self.lies = self._lies + self.trials = self._trials + + def cleanup(self): + """Cleanup after testing""" + if self.initialized: + self.database.remove('experiments', {}) + self.database.remove('trials', {}) + _remove(self.tempfile) + self.initialized = False + + +# We are faking a class constructor here +# pylint: disable=C0103 +def OrionState(*args, **kwargs): + """Build an orion state in function of the storage type""" + storage = kwargs.get('storage') + + if not storage or storage['type'] == 'legacy': + return LegacyOrionState(*args, **kwargs) + + return BaseOrionState(*args, **kwargs) diff --git a/src/orion/core/worker/__init__.py b/src/orion/core/worker/__init__.py index b9f5b4a5d..2d9de5c98 100644 --- a/src/orion/core/worker/__init__.py +++ b/src/orion/core/worker/__init__.py @@ -9,23 +9,28 @@ with parameter values suggested. """ -import io import itertools import logging -import pprint +from orion.core.utils.exceptions import WaitingForTrials +from orion.core.utils.format_terminal import format_stats from orion.core.worker.consumer import Consumer from orion.core.worker.producer import Producer -from orion.storage.base import get_storage + log = logging.getLogger(__name__) -def reserve_trial(experiment, producer): +def reserve_trial(experiment, producer, _depth=1): """Reserve a new trial, or produce and reserve a trial if none are available.""" - trial = experiment.reserve_trial(score_handle=producer.algorithm.score) + trial = experiment.reserve_trial() if trial is None and not experiment.is_done: + + if _depth > 10: + raise WaitingForTrials('No trials are available at the moment ' + 'wait for current trials to finish') + log.debug("#### Failed to pull a new trial from database.") log.debug("#### Fetch most recent completed trials and update algorithm.") @@ -34,55 +39,86 @@ def reserve_trial(experiment, producer): log.debug("#### Produce new trials.") producer.produce() - return reserve_trial(experiment, producer) + return reserve_trial(experiment, producer, _depth=_depth + 1) return trial -def workon(experiment, worker_trials=None): +COMPLETION_MESSAGE = """\ +Hints +===== + +Info +---- + +To get more information on the experiment, run the command + +orion info --name {experiment.name} --version {experiment.version} + +""" + + +NONCOMPLETED_MESSAGE = """\ +Status +------ + +To get the status of the trials, run the command + +orion status --name {experiment.name} --version {experiment.version} + + +For a detailed view with status of each trial listed, use the argument `--all` + +orion status --name {experiment.name} --version {experiment.version} --all + +""" + + +def workon(experiment, max_trials=None, max_broken=None, max_idle_time=None, heartbeat=None, + user_script_config=None, interrupt_signal_code=None): """Try to find solution to the search problem defined in `experiment`.""" - producer = Producer(experiment) - consumer = Consumer(experiment) + producer = Producer(experiment, max_idle_time) + consumer = Consumer(experiment, heartbeat, user_script_config, interrupt_signal_code) log.debug("##### Init Experiment #####") try: - iterator = range(int(worker_trials)) + iterator = range(int(max_trials)) except (OverflowError, TypeError): # When worker_trials is inf iterator = itertools.count() + worker_broken_trials = 0 for _ in iterator: log.debug("#### Poll for experiment termination.") if experiment.is_broken: - log.info("#### Experiment has reached broken trials threshold, terminating.") - return + print("#### Experiment has reached broken trials threshold, terminating.") + break if experiment.is_done: + print("##### Search finished successfully #####") break log.debug("#### Try to reserve a new trial to evaluate.") - trial = reserve_trial(experiment, producer) + try: + trial = reserve_trial(experiment, producer) + except WaitingForTrials as ex: + print("### Experiment failed to reserve new trials: {reason}, terminating. " + .format(reason=str(ex))) + break if trial is not None: log.debug("#### Successfully reserved %s to evaluate. Consuming...", trial) - consumer.consume(trial) - - stats = experiment.stats + success = consumer.consume(trial) + if not success: + worker_broken_trials += 1 - if not stats: - log.info("No trials completed.") - return - - best = get_storage().get_trial(uid=stats['best_trials_id']) - - stats_stream = io.StringIO() - pprint.pprint(stats, stream=stats_stream) - stats_string = stats_stream.getvalue() + if worker_broken_trials >= max_broken: + print("#### Worker has reached broken trials threshold, terminating.") + print(worker_broken_trials, max_broken) + break - best_stream = io.StringIO() - pprint.pprint(best.to_dict()['params'], stream=best_stream) - best_string = best_stream.getvalue() + print('\n' + format_stats(experiment)) - log.info("##### Search finished successfully #####") - log.info("\nRESULTS\n=======\n%s\n", stats_string) - log.info("\nBEST PARAMETERS\n===============\n%s", best_string) + print('\n' + COMPLETION_MESSAGE.format(experiment=experiment)) + if not experiment.is_done: + print(NONCOMPLETED_MESSAGE.format(experiment=experiment)) diff --git a/src/orion/core/worker/consumer.py b/src/orion/core/worker/consumer.py index 30020526f..c01c9318d 100644 --- a/src/orion/core/worker/consumer.py +++ b/src/orion/core/worker/consumer.py @@ -45,14 +45,31 @@ class Consumer(object): has been defined in a special orion environmental variable which is set into the child process' environment. - """ + Parameters + ---------- + experiment: `orion.core.worker.experiment.Experiment` + Manager of this experiment, provides convenient interface for interacting with + the database. + + heartbeat: int, optional + Frequency (seconds) at which the heartbeat of the trial is updated. + If the heartbeat of a `reserved` trial is larger than twice the configured + heartbeat, Oríon will reset the status of the trial to `interrupted`. + This allows restoring lost trials (ex: due to killed worker). + Defaults to `orion.core.config.worker.heartbeat`. + + user_script_config: str, optional + Config argument name of user's script (--config). + Defaults to `orion.core.config.worker.user_script_config`. + + interrupt_signal_code: int, optional + Signal returned by user script to signal to Oríon that it was interrupted. + Defaults to `orion.core.config.worker.interrupt_signal_code`. - def __init__(self, experiment): - """Initialize a consumer. + """ - :param experiment: Manager of this experiment, provides convenient - interface for interacting with the database. - """ + def __init__(self, experiment, heartbeat=None, user_script_config=None, + interrupt_signal_code=None): log.debug("Creating Consumer object.") self.experiment = experiment self.space = experiment.space @@ -60,8 +77,20 @@ def __init__(self, experiment): raise RuntimeError("Experiment object provided to Consumer has not yet completed" " initialization.") + if heartbeat is None: + heartbeat = orion.core.config.worker.heartbeat + + if user_script_config is None: + user_script_config = orion.core.config.worker.user_script_config + + if interrupt_signal_code is None: + interrupt_signal_code = orion.core.config.worker.interrupt_signal_code + + self.heartbeat = heartbeat + self.interrupt_signal_code = interrupt_signal_code + # Fetch space builder - self.template_builder = OrionCmdlineParser(orion.core.config.user_script_config) + self.template_builder = OrionCmdlineParser(user_script_config) self.template_builder.set_state_dict(experiment.metadata['parser']) # Get path to user's script and infer trial configuration directory if experiment.working_dir: @@ -69,19 +98,25 @@ def __init__(self, experiment): else: self.working_dir = os.path.join(tempfile.gettempdir(), 'orion') - self.script_path = experiment.metadata['user_script'] - self.pacemaker = None def consume(self, trial): """Execute user's script as a block box using the options contained within `trial`. - :type trial: `orion.core.worker.trial.Trial` + Parameters + ---------- + trial: `orion.core.worker.trial.Trial` + Orion trial to execute. + + Returns + ------- + bool + True if the trial was successfully executed. False if the trial is broken. """ log.debug("### Create new directory at '%s':", self.working_dir) - temp_dir = self.experiment.working_dir is None + temp_dir = not bool(self.experiment.working_dir) prefix = self.experiment.name + "_" suffix = trial.id @@ -96,6 +131,8 @@ def consume(self, trial): log.debug("## Parse results from file and fill corresponding Trial object.") self.experiment.update_completed_trial(trial, results_file) + success = True + except KeyboardInterrupt: log.debug("### Save %s as interrupted.", trial) self.experiment.set_trial_status(trial, status='interrupted') @@ -105,6 +142,10 @@ def consume(self, trial): log.debug("### Save %s as broken.", trial) self.experiment.set_trial_status(trial, status='broken') + success = False + + return success + def get_execution_environment(self, trial, results_file='results.log'): """Set a few environment variables to allow users and underlying processes to know if they are running under orion. @@ -113,6 +154,7 @@ def get_execution_environment(self, trial, results_file='results.log'): ---------- results_file: str file used to store results, this is only used by the legacy protocol + trial: Trial reference to the trial object that is going to be run @@ -147,7 +189,6 @@ def get_execution_environment(self, trial, results_file='results.log'): """ env = dict(os.environ) - env['ORION_EXPERIMENT_ID'] = str(self.experiment.id) env['ORION_EXPERIMENT_NAME'] = str(self.experiment.name) env['ORION_EXPERIMENT_VERSION'] = str(self.experiment.version) @@ -155,6 +196,7 @@ def get_execution_environment(self, trial, results_file='results.log'): env['ORION_WORKING_DIR'] = str(trial.working_dir) env['ORION_RESULTS_PATH'] = str(results_file) + env['ORION_INTERRUPT_CODE'] = str(self.interrupt_signal_code) return env @@ -176,7 +218,7 @@ def _consume(self, trial, workdirname): log.debug("## Launch user's script as a subprocess and wait for finish.") - self.pacemaker = TrialPacemaker(trial) + self.pacemaker = TrialPacemaker(trial, self.heartbeat) self.pacemaker.start() try: self.execute_process(cmd_args, env) @@ -186,14 +228,18 @@ def _consume(self, trial, workdirname): return results_file + # pylint: disable = no-self-use def execute_process(self, cmd_args, environ): """Facilitate launching a black-box trial.""" - command = [self.script_path] + cmd_args + command = cmd_args signal.signal(signal.SIGTERM, _handler) process = subprocess.Popen(command, env=environ) return_code = process.wait() - if return_code != 0: + + if return_code == self.interrupt_signal_code: + raise KeyboardInterrupt() + elif return_code != 0: raise ExecutionError("Something went wrong. Check logs. Process " "returned with code {} !".format(return_code)) diff --git a/src/orion/core/worker/experiment.py b/src/orion/core/worker/experiment.py index c0ec56fef..2ce7e35ad 100644 --- a/src/orion/core/worker/experiment.py +++ b/src/orion/core/worker/experiment.py @@ -11,24 +11,12 @@ """ import copy import datetime -import getpass import logging -import sys import orion.core -from orion.core.cli.evc import fetch_branching_configuration -from orion.core.evc.adapters import Adapter, BaseAdapter -from orion.core.evc.conflicts import detect_conflicts, ExperimentNameConflict -from orion.core.io.database import DuplicateKeyError -from orion.core.io.experiment_branch_builder import ExperimentBranchBuilder -from orion.core.io.interactive_commands.branching_prompt import BranchingPrompt -from orion.core.io.space_builder import SpaceBuilder -import orion.core.utils.backward as backward -from orion.core.utils.exceptions import RaceCondition -from orion.core.worker.primary_algo import PrimaryAlgo -from orion.core.worker.strategy import (BaseParallelStrategy, - Strategy) -from orion.storage.base import get_storage, ReadOnlyStorageProtocol +from orion.core.evc.adapters import BaseAdapter +from orion.core.evc.experiment import ExperimentNode +from orion.storage.base import FailedUpdate, get_storage, ReadOnlyStorageProtocol log = logging.getLogger(__name__) @@ -60,7 +48,9 @@ class Experiment: This attribute can be updated if the rest of the experiment configuration is the same. In that case, if trying to set to an already set experiment, it will overwrite the previous one. - algorithms : dict of dicts or an `PrimaryAlgo` object, after initialization is done. + space: Space + Object representing the optimization space. + algorithms : `PrimaryAlgo` object. Complete specification of the optimization and dynamical procedures taking place in this `Experiment`. @@ -88,76 +78,27 @@ class Experiment: """ __slots__ = ('name', 'refers', 'metadata', 'pool_size', 'max_trials', 'version', - 'algorithms', 'producer', 'working_dir', '_init_done', '_id', + 'space', 'algorithms', 'producer', 'working_dir', '_id', '_node', '_storage') non_branching_attrs = ('pool_size', 'max_trials') - def __init__(self, name, user=None, version=None): - """Initialize an Experiment object with primary key (:attr:`name`, :attr:`user`). - - Try to find an entry in `Database` with such a key and config this object - from it import, if successful. Else, init with default/empty values and - insert new entry with this object's attributes in database. - - .. note:: - Practically initialization has not finished until `config`'s setter - is called. - - :param name: Describe a configuration with a unique identifier per :attr:`user`. - :type name: str - """ - log.debug("Creating Experiment object with name: %s", name) - self._init_done = False - + def __init__(self, name, version=None): self._id = None self.name = name + self.version = version if version else 1 self._node = None self.refers = {} - if user is None: - user = getpass.getuser() - self.metadata = {'user': user} + self.metadata = {} self.pool_size = None self.max_trials = None + self.space = None self.algorithms = None self.working_dir = None - self.producer = {'strategy': None} - self.version = 1 + self.producer = {} # this needs to be an attribute because we override it in ExperienceView self._storage = get_storage() - config = self._storage.fetch_experiments({'name': name}) - - if config: - log.debug("Found existing experiment, %s, under user, %s, registered in database.", - name, user) - - if len(config) > 1: - max_version = max(config, key=lambda exp: exp.get('version', 1)).get('version', 1) - - if version is None: - self.version = max_version - else: - self.version = version - - if self.version > max_version: - log.warning("Version %s was specified but most recent version is only %s. " - "Using %s.", self.version, max_version, max_version) - - self.version = min(self.version, max_version) - - log.info("Many versions for experiment %s have been found. Using latest " - "version %s.", name, self.version) - config = filter(lambda exp: exp.get('version', 1) == self.version, config) - - config = sorted(config, key=lambda x: x['metadata']['datetime'], - reverse=True)[0] - - backward.populate_priors(config['metadata']) - - for attrname in self.__slots__: - if not attrname.startswith('_') and attrname in config: - setattr(self, attrname, config[attrname]) - self._id = config['_id'] + self._node = ExperimentNode(self.name, self.version, experiment=self) def fetch_trials(self, with_evc_tree=False): """Fetch all trials of the experiment""" @@ -167,18 +108,6 @@ def get_trial(self, trial=None, uid=None): """Fetch a single Trial, see `orion.storage.base.BaseStorage.get_trial`""" return self._storage.get_trial(trial, uid) - def connect_to_version_control_tree(self, node): - """Connect the experiment to its node in a version control tree - - .. seealso:: - - :class:`orion.core.evc.experiment.ExperimentNode` - - :param node: Node giving access to the experiment version control tree. - :type name: None or `ExperimentNode` - """ - self._node = node - def retrieve_result(self, trial, *args, **kwargs): """See :func:`~orion.storage.BaseStorageProtocol.retrieve_result`""" return self._storage.retrieve_result(trial, *args, **kwargs) @@ -205,9 +134,6 @@ def reserve_trial(self, score_handle=None): """ log.debug('reserving trial with (score: %s)', score_handle) - if score_handle is not None: - log.warning("Argument `score_handle` is deprecated") - self.fix_lost_trials() selected_trial = self._storage.reserve_trial(self) @@ -228,10 +154,13 @@ def fix_lost_trials(self): for trial in trials: log.debug('Setting lost trial %s status to interrupted...', trial.id) - updated = self._storage.set_trial_status(trial, status='interrupted') - log.debug('success' if updated else 'failed') + try: + self._storage.set_trial_status(trial, status='interrupted') + log.debug('success') + except FailedUpdate: + log.debug('failed') - def update_completed_trial(self, trial, results_file): + def update_completed_trial(self, trial, results_file=None): """Inform database about an evaluated `trial` with results. :param trial: Corresponds to a successful evaluation of a particular run. @@ -274,7 +203,7 @@ def register_lie(self, lying_trial): lying_trial.end_time = datetime.datetime.utcnow() self._storage.register_lie(lying_trial) - def register_trial(self, trial): + def register_trial(self, trial, status='new'): """Register new trial in the database. Inform database about *new* suggested trial with specific parameter values. Trials may only @@ -295,7 +224,7 @@ def register_trial(self, trial): """ stamp = datetime.datetime.utcnow() trial.experiment = self._id - trial.status = 'new' + trial.status = status trial.submit_time = stamp self._storage.register_trial(trial) @@ -313,7 +242,7 @@ def fetch_trials_by_status(self, status, with_evc_tree=False): :return: list of `Trial` objects """ - return self._select_evc_call(with_evc_tree, 'fetch_trial_by_status', status) + return self._select_evc_call(with_evc_tree, 'fetch_trials_by_status', status) def fetch_noncompleted_trials(self, with_evc_tree=False): """Fetch non-completed trials of this `Experiment` instance. @@ -356,17 +285,26 @@ def is_done(self): """Return True, if this experiment is considered to be finished. 1. Count how many trials have been completed and compare with `max_trials`. - 2. Ask `algorithms` if they consider there is a chance for further improvement. + 2. Ask `algorithms` if they consider there is a chance for further improvement, and + verify is there is any pending trial. .. note:: To be used as a terminating condition in a ``Worker``. """ - num_completed_trials = self._storage.count_completed_trials(self) + trials = self.fetch_trials(with_evc_tree=True) + num_completed_trials = 0 + num_pending_trials = 0 + for trial in trials: + if trial.status == 'completed': + num_completed_trials += 1 + elif trial.status in ['new', 'reserved', 'interrupted']: + num_pending_trials += 1 - return ((num_completed_trials >= self.max_trials) or - (self._init_done and self.algorithms.is_done)) + return ( + (num_completed_trials >= self.max_trials) or + (self.algorithms.is_done and num_pending_trials == 0)) @property def is_broken(self): @@ -380,16 +318,6 @@ def is_broken(self): num_broken_trials = self._storage.count_broken_trials(self) return num_broken_trials >= orion.core.config.worker.max_broken - @property - def space(self): - """Return problem's parameter `orion.algo.space.Space`. - - .. note:: It will return None, if experiment init is not done. - """ - if self._init_done: - return self.algorithms.space - return None - @property def configuration(self): """Return a copy of an `Experiment` configuration as a dictionary.""" @@ -397,23 +325,18 @@ def configuration(self): for attrname in self.__slots__: if attrname.startswith('_'): continue - attribute = getattr(self, attrname) - if self._init_done and attrname == 'algorithms': + attribute = copy.deepcopy(getattr(self, attrname)) + config[attrname] = attribute + if attrname in ['algorithms', 'space']: config[attrname] = attribute.configuration - else: - config[attrname] = attribute - - if attrname == "refers" and isinstance(attribute.get("adapter"), BaseAdapter): - config[attrname] = copy.deepcopy(config[attrname]) + elif attrname == "refers" and isinstance(attribute.get("adapter"), BaseAdapter): config[attrname]['adapter'] = config[attrname]['adapter'].configuration - - if self._init_done and attrname == "producer" and attribute.get("strategy"): - config[attrname] = copy.deepcopy(config[attrname]) + elif attrname == "producer" and attribute.get("strategy"): config[attrname]['strategy'] = config[attrname]['strategy'].configuration - # Reason for deepcopy is that some attributes are dictionaries - # themselves, we don't want to accidentally change the state of this - # object from a getter. + if self.id is not None: + config['_id'] = self.id + return copy.deepcopy(config) @property @@ -466,203 +389,6 @@ def stats(self): return stats - def configure(self, config, enable_branching=True, enable_update=True): - """Set `Experiment` by overwriting current attributes. - - If `Experiment` was already set and an overwrite is needed, a *branch* - is advised with a different :attr:`name` for this particular configuration. - - .. note:: - - Calling this property is necessary for an experiment's initialization process to be - considered as done. But it can be called only once. - - """ - log.debug('configuring (name: %s)', config['name']) - if self._init_done: - raise RuntimeError("Configuration is done; cannot reset an Experiment.") - - # Experiment was build using db, but config was build before experiment got in db. - # Fake a DuplicateKeyError to force reinstantiation of experiment with proper config. - if self._id is not None and "datetime" not in config['metadata']: - raise DuplicateKeyError("Cannot register an existing experiment with a new config") - - # Copy and simulate instantiating given configuration - experiment = Experiment(self.name, version=self.version) - experiment._instantiate_config(self.configuration) - experiment._instantiate_config(config) - experiment._init_done = True - - # If id is None in this object, then database did not hit a config - # with same (name, user's name) pair. Everything depends on the user's - # orion_config to set. - if self._id is None: - if config['name'] != self.name or \ - config['metadata']['user'] != self.metadata['user']: - raise ValueError("Configuration given is inconsistent with this Experiment.") - must_branch = True - else: - # Branch if it is needed - # TODO: When refactoring experiment managenent, is_different_from - # will be used when EVC is not available. - # must_branch = self._is_different_from(experiment.configuration) - branching_configuration = fetch_branching_configuration(config) - configuration = self.configuration - configuration['_id'] = self._id - conflicts = detect_conflicts(configuration, experiment.configuration) - must_branch = len(conflicts.get()) > 1 or branching_configuration.get('branch') - - name_conflict = conflicts.get([ExperimentNameConflict])[0] - if not name_conflict.is_resolved and not config.get('version'): - raise RaceCondition('There was likely a race condition during version increment.') - - elif must_branch and not enable_branching: - raise ValueError("Configuration is different and generate a branching event") - - elif must_branch: - experiment._branch_config(conflicts, branching_configuration) - - final_config = experiment.configuration - self._instantiate_config(final_config) - - self._init_done = True - - if not enable_update: - return - - # If everything is alright, push new config to database - if must_branch: - final_config['metadata']['datetime'] = datetime.datetime.utcnow() - self.metadata['datetime'] = final_config['metadata']['datetime'] - # This will raise DuplicateKeyError if a concurrent experiment with - # identical (name, metadata.user) is written first in the database. - self._storage.create_experiment(final_config) - - # XXX: Reminder for future DB implementations: - # MongoDB, updates an inserted dict with _id, so should you :P - self._id = final_config['_id'] - - # Update refers in db if experiment is root - if self.refers['parent_id'] is None: - log.debug('update refers (name: %s)', config['name']) - self.refers['root_id'] = self._id - self._storage.update_experiment(self, refers=self.configuration['refers']) - - else: - # Writing the final config to an already existing experiment raises - # a DuplicatKeyError because of the embedding id `metadata.user`. - # To avoid this `final_config["name"]` is popped out before - # `db.write()`, thus seamingly breaking the compound index - # `(name, metadata.user)` - log.debug('updating experiment (name: %s)', config['name']) - - final_config.pop("name") - self._storage.update_experiment(self, **final_config) - - def _instantiate_config(self, config): - """Check before dispatching experiment whether configuration corresponds - to a executable experiment environment. - - 1. Check `refers` and instantiate `Adapter` objects from it. - 2. Try to build parameter space from user arguments. - 3. Check whether configured algorithms correspond to [known]/valid - implementations of the ``Algorithm`` class. Instantiate these objects. - 4. Check if experiment `is_done`, prompt for larger `max_trials` if it is. (TODO) - - """ - # Just overwrite everything else given - for section, value in config.items(): - if section not in self.__slots__: - log.info("Found section '%s' in configuration. Experiments " - "do not support this option. Ignoring.", section) - continue - if section.startswith('_'): - log.info("Found section '%s' in configuration. " - "Cannot set private attributes. Ignoring.", section) - continue - - # Copy sub configuration to value confusing side-effects - # Only copy at this level, not `config` directly to avoid TypeErrors if config contains - # non-serializable objects (copy.deepcopy complains otherwise). - if isinstance(value, dict): - value = copy.deepcopy(value) - - setattr(self, section, value) - - # TODO: Can we get rid of this try-except clause? - try: - space_builder = SpaceBuilder() - space = space_builder.build(config['metadata']['priors']) - - if not space: - raise ValueError("Parameter space is empty. There is nothing to optimize.") - - # Instantiate algorithms - self.algorithms = PrimaryAlgo(space, self.algorithms) - except KeyError: - pass - - self.refers.setdefault('parent_id', None) - self.refers.setdefault('root_id', self._id) - self.refers.setdefault('adapter', []) - if not isinstance(self.refers.get('adapter'), BaseAdapter): - self.refers['adapter'] = Adapter.build(self.refers['adapter']) - - if not self.producer.get('strategy'): - self.producer = {'strategy': Strategy(of_type="MaxParallelStrategy")} - elif not isinstance(self.producer.get('strategy'), BaseParallelStrategy): - self.producer = {'strategy': Strategy(of_type=self.producer['strategy'])} - - def _branch_config(self, conflicts, branching_configuration): - """Ask for a different identifier for this experiment. Set :attr:`refers` - key to previous experiment's name, the one that we branched from. - - :param config: Conflicting configuration that will change based on prompt. - """ - experiment_brancher = ExperimentBranchBuilder(conflicts, branching_configuration) - - needs_manual_resolution = (not experiment_brancher.is_resolved or - experiment_brancher.manual_resolution) - - if needs_manual_resolution: - branching_prompt = BranchingPrompt(experiment_brancher) - - if not sys.__stdin__.isatty(): - raise ValueError( - "Configuration is different and generates a branching event:\n{}".format( - branching_prompt.get_status())) - - branching_prompt.cmdloop() - - if branching_prompt.abort or not experiment_brancher.is_resolved: - sys.exit() - - adapter = experiment_brancher.create_adapters() - self._instantiate_config(experiment_brancher.conflicting_config) - self.refers['adapter'] = adapter - self.refers['parent_id'] = self._id - - def _is_different_from(self, config): - """Return True, if current `Experiment`'s configuration as described by - its attributes is different from the one suggested in `config`. - """ - is_diff = False - for section, value in config.items(): - if section in self.non_branching_attrs or \ - section not in self.__slots__ or \ - section.startswith('_'): - continue - item = getattr(self, section) - if item != value: - log.warning("Config given is different from config found in db at section: %s", - section) - log.warning("Config+ :\n%s", value) - log.warning("Config- :\n%s", item) - is_diff = True - break - - return is_diff - def __repr__(self): """Represent the object as a string.""" return "Experiment(name=%s, metadata.user=%s, version=%s)" % \ @@ -683,53 +409,15 @@ class ExperimentView(object): # Attributes valid_attributes = (["_id", "name", "refers", "metadata", "pool_size", "max_trials", - "version"] + + "version", "space"] + # Properties - ["id", "node", "is_done", "space", "algorithms", "stats", "configuration"] + + ["id", "node", "is_done", "algorithms", "stats", "configuration"] + # Methods - ["fetch_trials", "fetch_trials_by_status", - "connect_to_version_control_tree", "get_trial"]) - - def __init__(self, name, user=None, version=None): - """Initialize viewed experiment object with primary key (:attr:`name`, :attr:`user`). - - Build an experiment from configuration found in `Database` with a key (name, user). + ["fetch_trials", "fetch_trials_by_status", "get_trial"]) - .. note:: - - A view is fully configured at initialiation. It cannot be reconfigured. - If no experiment is found for the key (name, user), a `ValueError` will be raised. - - :param name: Describe a configuration with a unique identifier per :attr:`user`. - :type name: str - """ - self._experiment = Experiment(name, user, version) - - if self._experiment.id is None: - raise ValueError("No experiment with given name '%s' for user '%s' inside database, " - "no view can be created." % - (self._experiment.name, self._experiment.metadata['user'])) - - # TODO: Views are not fully configured until configuration is refactored - # This snippet is to instantiate adapters anyhow, because it is required for - # experiment views in EVC. - self.refers.setdefault('parent_id', None) - self.refers.setdefault('root_id', self._id) - self.refers.setdefault('adapter', []) - if not isinstance(self.refers.get('adapter'), BaseAdapter): - self.refers['adapter'] = Adapter.build(self.refers['adapter']) - - # try: - # self._experiment.configure(self._experiment.configuration, enable_branching=False, - # enable_update=False) - # except ValueError as e: - # if "Configuration is different and generates a branching event" in str(e): - # raise RuntimeError( - # "Configuration in the database does not correspond to the one generated by " - # "Experiment object. This is likely due to a backward incompatible update in " - # "Oríon. Please report to https://github.com/epistimio/orion/issues.") from e - # raise - self._experiment._storage = ReadOnlyStorageProtocol(get_storage()) + def __init__(self, experiment): + self._experiment = experiment + self._experiment._storage = ReadOnlyStorageProtocol(experiment._storage) def __getattr__(self, name): """Get attribute only if valid""" diff --git a/src/orion/core/worker/primary_algo.py b/src/orion/core/worker/primary_algo.py index f11acbaba..faba161e3 100644 --- a/src/orion/core/worker/primary_algo.py +++ b/src/orion/core/worker/primary_algo.py @@ -8,7 +8,6 @@ :synopsis: Performs checks and organizes required transformations of points. """ - from orion.algo.base import BaseAlgorithm from orion.core.worker.transformer import build_required_space diff --git a/src/orion/core/worker/producer.py b/src/orion/core/worker/producer.py index 56c6ee8cc..4f4da09e8 100644 --- a/src/orion/core/worker/producer.py +++ b/src/orion/core/worker/producer.py @@ -16,6 +16,8 @@ import orion.core from orion.core.io.database import DuplicateKeyError from orion.core.utils import format_trials +from orion.core.utils.exceptions import SampleTimeout, WaitingForTrials +from orion.core.worker.trial import Trial from orion.core.worker.trials_history import TrialsHistory log = logging.getLogger(__name__) @@ -43,6 +45,7 @@ def __init__(self, experiment, max_idle_time=None): raise RuntimeError("Experiment object provided to Producer has not yet completed" " initialization.") self.algorithm = experiment.algorithms + self.algorithm.algorithm.max_trials = experiment.max_trials if max_idle_time is None: max_idle_time = orion.core.config.worker.max_idle_time self.max_idle_time = max_idle_time @@ -51,7 +54,9 @@ def __init__(self, experiment, max_idle_time=None): # TODO: Move trials_history into PrimaryAlgo during the refactoring of Algorithm with # Strategist and Scheduler. self.trials_history = TrialsHistory() + self.params_hashes = set() self.naive_trials_history = None + self.failure_count = 0 @property def pool_size(self): @@ -60,51 +65,93 @@ def pool_size(self): def backoff(self): """Wait some time and update algorithm.""" - waiting_time = min(0, random.gauss(1, 0.2)) + waiting_time = max(0, random.gauss(1, 0.2)) log.info('Waiting %d seconds', waiting_time) time.sleep(waiting_time) log.info('Updating algorithm.') self.update() + self.failure_count += 1 + + def _sample_guard(self, start): + """Check that the time taken sampling is less than max_idle_time""" + if time.time() - start > self.max_idle_time: + raise SampleTimeout( + "Algorithm could not sample new points in less than {} seconds." + "Failed to sample points {} times".format(self.max_idle_time, self.failure_count)) def produce(self): """Create and register new trials.""" sampled_points = 0 + # reset the number of time we failed to sample points + self.failure_count = 0 start = time.time() - while sampled_points < self.pool_size and not self.algorithm.is_done: - if time.time() - start > self.max_idle_time: - raise RuntimeError( - "Algorithm could not sample new points in less than {} seconds".format( - self.max_idle_time)) - log.debug("### Algorithm suggests new points.") + while (sampled_points < self.pool_size and + not (self.experiment.is_done or self.naive_algorithm.is_done)): + self._sample_guard(start) + log.debug("### Algorithm suggests new points.") new_points = self.naive_algorithm.suggest(self.pool_size) + # Sync state of original algo so that state continues evolving. self.algorithm.set_state(self.naive_algorithm.state_dict) + if new_points is None: - log.info("### Algo opted out.") - self.backoff() - continue + if self.algorithm.is_done: + return + + raise WaitingForTrials('Algo does not have more trials to sample.' + 'Waiting for current trials to finish') for new_point in new_points: - log.debug("#### Convert point to `Trial` object.") - new_trial = format_trials.tuple_to_trial(new_point, self.space) - try: - new_trial.parents = self.naive_trials_history.children - log.debug("#### Register new trial to database: %s", new_trial) - self.experiment.register_trial(new_trial) - sampled_points += 1 - except DuplicateKeyError: - log.debug("#### Duplicate sample.") - self.backoff() - break + sampled_points += self.register_trials(new_point) + + def register_trials(self, new_point): + """Register a new set of sampled parameters into the DB + guaranteeing their uniqueness + + Parameters + ---------- + new_point: tuple + tuple of values representing the hyperparameters values + + """ + # FIXME: Relying on DB to guarantee uniqueness + # when the trial history will be held by that algo we can move that logic out of the DB + + log.debug("#### Convert point to `Trial` object.") + new_trial = format_trials.tuple_to_trial(new_point, self.space) + + try: + self._prevalidate_trial(new_trial) + new_trial.parents = self.naive_trials_history.children + log.debug("#### Register new trial to database: %s", new_trial) + self.experiment.register_trial(new_trial) + self._update_params_hashes([new_trial]) + return 1 + + except DuplicateKeyError: + log.debug("#### Duplicate sample.") + self.backoff() + return 0 + + def _prevalidate_trial(self, new_trial): + """Verify if trial is not in parent history""" + if Trial.compute_trial_hash(new_trial, ignore_experiment=True) in self.params_hashes: + raise DuplicateKeyError + + def _update_params_hashes(self, trials): + """Register locally all param hashes of trials""" + for trial in trials: + self.params_hashes.add( + Trial.compute_trial_hash(trial, ignore_experiment=True, ignore_lie=True)) def update(self): """Pull all trials to update model with completed ones and naive model with non completed ones. """ - trials = self.experiment.fetch_trials() + trials = self.experiment.fetch_trials(with_evc_tree=True) self._update_algorithm([trial for trial in trials if trial.status == 'completed']) self._update_naive_algorithm([trial for trial in trials if trial.status != 'completed']) @@ -130,6 +177,7 @@ def _update_algorithm(self, completed_trials): self.trials_history.update(new_completed_trials) self.algorithm.observe(points, results) self.strategy.observe(points, results) + self._update_params_hashes(new_completed_trials) def _produce_lies(self, incomplete_trials): """Add fake objective results to incomplete trials @@ -172,3 +220,4 @@ def _update_naive_algorithm(self, incomplete_trials): log.debug("### Observe them.") self.naive_trials_history.update(lying_trials) self.naive_algorithm.observe(points, results) + self._update_params_hashes(lying_trials) diff --git a/src/orion/core/worker/strategy.py b/src/orion/core/worker/strategy.py index f66567c1b..82d9b7a33 100644 --- a/src/orion/core/worker/strategy.py +++ b/src/orion/core/worker/strategy.py @@ -17,6 +17,17 @@ log = logging.getLogger(__name__) +CORRUPTED_DB_WARNING = """\ +Trial `%s` has an objective but status is not completed. +This is likely due to a corrupted database, possibly because of +database timeouts. Try setting manually status to `completed`. +You can find documention to do this at +https://orion.readthedocs.io/en/stable/user/storage.html#storage-backend. + +If you encounter this issue often, please consider reporting it to +https://github.com/Epistimio/orion/issues.""" + + def get_objective(trial): """Get the value for the objective, if it exists, for this trial @@ -57,15 +68,33 @@ def observe(self, points, results): # converted to expect trials instead of lists and dictionaries. pass - @abstractmethod + # pylint: disable=no-self-use def lie(self, trial): """Construct a fake result for an incomplete trial - :param trial: `orion.core.worker.trial.Trial` - :return: Float or None - The fake objective result corresponding to the trial given + Parameters + ---------- + trial: `orion.core.worker.trial.Trial` + A trial object which is not supposed to be completed. + + Returns + ------- + ``orion.core.worker.trial.Trial.Result`` + The fake objective result corresponding to the trial given. + + Notes + ----- + If the trial has an objective even if not completed, a warning is printed to user + with a pointer to documentation to resolve the database corruption. The result returned is + the corresponding objective instead of the lie. + """ - pass + objective = get_objective(trial) + if objective: + log.warning(CORRUPTED_DB_WARNING, trial.id) + return Trial.Result(name='lie', type='lie', value=objective) + + return None @property def configuration(self): @@ -83,7 +112,11 @@ def observe(self, points, results): def lie(self, trial): """See BaseParallelStrategy.lie""" - pass + result = super(NoParallelStrategy, self).lie(trial) + if result: + return result + + return None class MaxParallelStrategy(BaseParallelStrategy): @@ -101,8 +134,9 @@ def observe(self, points, results): def lie(self, trial): """See BaseParallelStrategy.lie""" - if get_objective(trial): - raise RuntimeError("Trial {} is completed but should not be.".format(trial.id)) + result = super(MaxParallelStrategy, self).lie(trial) + if result: + return result return Trial.Result(name='lie', type='lie', value=self.max_result) @@ -123,8 +157,9 @@ def observe(self, points, results): def lie(self, trial): """See BaseParallelStrategy.lie""" - if get_objective(trial): - raise RuntimeError("Trial {} is completed but should not be.".format(trial.id)) + result = super(MeanParallelStrategy, self).lie(trial) + if result: + return result return Trial.Result(name='lie', type='lie', value=self.mean_result) @@ -142,8 +177,9 @@ def observe(self, points, results): def lie(self, trial): """See BaseParallelStrategy.lie""" - if get_objective(trial): - raise RuntimeError("Trial {} is completed but should not be.".format(trial.id)) + result = super(StubParallelStrategy, self).lie(trial) + if result: + return result return Trial.Result(name='lie', type='lie', value=self.stub_value) diff --git a/src/orion/core/worker/transformer.py b/src/orion/core/worker/transformer.py index 22a293890..75fa7fd48 100644 --- a/src/orion/core/worker/transformer.py +++ b/src/orion/core/worker/transformer.py @@ -18,6 +18,7 @@ from orion.algo.space import (Dimension, Space) +# pylint: disable=too-many-branches def build_required_space(requirements, original_space): """Build a `Space` object which agrees to the `requirements` imposed by the desired optimization algorithm. @@ -43,13 +44,18 @@ def build_required_space(requirements, original_space): """ requirements = requirements if isinstance(requirements, list) else [requirements] + if not requirements: + requirements = [None] + space = TransformedSpace() for dim in original_space.values(): transformers = [] type_ = dim.type base_domain_type = type_ for requirement in requirements: - if type_ == 'real' and requirement in ('real', None): + if type_ == 'real' and requirement in ('real', None) and dim.precision is not None: + transformers.append(Precision(dim.precision)) + elif type_ == 'real' and requirement in ('real', None): pass elif type_ == 'real' and requirement == 'integer': transformers.append(Quantize()) @@ -64,9 +70,11 @@ def build_required_space(requirements, original_space): transformers.append(Enumerate(dim.categories)) elif type_ == 'categorical' and requirement is None: pass + elif type_ == 'fidelity' and requirement is None: + pass else: raise TypeError("Unsupported dimension type ('{}') " - "or requirement ('{}')".format(requirement, type_)) + "or requirement ('{}')".format(type_, requirement)) try: last_type = transformers[-1].target_type type_ = last_type if last_type != 'invariant' else type_ @@ -124,7 +132,6 @@ class Identity(Transformer): """Implement an identity transformation. Everything as it is.""" def __init__(self, domain_type=None): - """Initialize an identity transformation. Domain type is equal to target type.""" self._domain_type = domain_type def transform(self, point): @@ -151,11 +158,10 @@ def target_type(self): class Compose(Transformer): - """Implement a composite transformation.""" + """Initialize composite transformer with a list of `Transformer` objects + and domain type on which it will be applied.""" def __init__(self, transformers, base_domain_type=None): - """Initialize composite transformer with a list of `Transformer` objects - and domain type on which it will be applied.""" try: self.apply = transformers.pop() except IndexError: @@ -209,10 +215,6 @@ class Reverse(Transformer): """Apply the reverse transformation that another one would do.""" def __init__(self, transformer: Transformer): - """Initialize object with an existing `transformer`. - - This will apply `transformer`'s methods in reverse. - """ assert not isinstance(transformer, OneHotEncode), "real to categorical is pointless" self.transformer = transformer @@ -239,6 +241,36 @@ def domain_type(self): return self.transformer.target_type +class Precision(Transformer): + """Round real numbers to requested precision.""" + + domain_type = 'real' + target_type = 'real' + + def __init__(self, precision=4): + self.precision = precision + + def transform(self, point): + """Round `point` to the requested precision, as numpy arrays.""" + # numpy.format_float_scientific precision starts at 0 + if isinstance(point, (list, tuple)) or (isinstance(point, numpy.ndarray) and point.shape): + point = map(lambda x: numpy.format_float_scientific(x, precision=self.precision - 1), + point) + point = list(map(float, point)) + else: + point = float(numpy.format_float_scientific(point, precision=self.precision - 1)) + + return numpy.asarray(point) + + def reverse(self, transformed_point): + """Cast `transformed_point` to floats, as numpy arrays.""" + return self.transform(transformed_point) + + def repr_format(self, what): + """Format a string for calling ``__repr__`` in `TransformedDimension`.""" + return "{}({}, {})".format(self.__class__.__name__, self.precision, what) + + class Quantize(Transformer): """Transform real numbers to integers, violating injection.""" @@ -264,7 +296,6 @@ class Enumerate(Transformer): target_type = 'integer' def __init__(self, categories): - """Initialize `Enumerate` transformation with a list of `categories`.""" self.categories = categories map_dict = {cat: i for i, cat in enumerate(categories)} self._map = numpy.vectorize(lambda x: map_dict[x], otypes='i') @@ -296,9 +327,6 @@ class OneHotEncode(Transformer): target_type = 'real' def __init__(self, bound: int): - """Initialize `OneHotEncode` transformer, so that it can construct - a `bound`-dimensional real vector representation of some integer less than `bound`. - """ self.num_cats = bound def transform(self, point): @@ -383,12 +411,11 @@ def sample(self, n_samples=1, seed=None): def interval(self, alpha=1.0): """Map the interval bounds to the transformed ones.""" - try: - low, high = self.original_dimension.interval(alpha) - except RuntimeError as exc: - if "Categories" in str(exc): - return (-0.1, 1.1) - raise + if self.original_dimension.prior_name == 'choices': + return self.original_dimension.categories + + low, high = self.original_dimension.interval(alpha) + return self.transform(low), self.transform(high) def __contains__(self, point): @@ -449,6 +476,11 @@ def type(self): type_ = self.transformer.target_type return type_ if type_ != 'invariant' else self.original_dimension.type + @property + def prior_name(self): + """Do not change the prior name of the original dimension.""" + return self.original_dimension.prior_name + @property def shape(self): """Wrap original shape with transformer, because it may have changed.""" @@ -464,6 +496,11 @@ def cast(self, point): """Cast a point according to original_dimension and then transform it""" return self.transform(self.original_dimension.cast(point)) + @property + def cardinality(self): + """Wrap original `Dimension` capacity""" + return self.original_dimension.cardinality + class TransformedSpace(Space): """Wrap the `Space` to support transformation methods.""" diff --git a/src/orion/core/worker/trial.py b/src/orion/core/worker/trial.py index d2463e5e2..3213854f2 100644 --- a/src/orion/core/worker/trial.py +++ b/src/orion/core/worker/trial.py @@ -12,9 +12,19 @@ import hashlib import logging +from orion.core.utils.flatten import unflatten + + log = logging.getLogger(__name__) +def validate_status(status): + """Verify if given status is valid.""" + if status is not None and status not in Trial.allowed_stati: + raise ValueError("Given status `{0}` not one of: {1}".format( + status, Trial.allowed_stati)) + + class Trial: """Represents an entry in database/trials collection. @@ -23,6 +33,9 @@ class Trial: experiment : str Unique identifier for the experiment that produced this trial. Same as an `Experiment._id`. + id_override: str + Trial id returned by the database. It should be unique for a given + set of parameters heartbeat : datetime.datetime Last time trial was identified as being alive. status : str @@ -54,8 +67,8 @@ class Trial: List of evaluated metrics for this particular set of params. One and only one of them is necessarily an *objective* function value. The other are *constraints*, the value of an expression desired to be larger/equal to 0. - params : list of `Trial.Param` - List of suggested values for the `Experiment` parameter space. + params : dict of params + Dict of suggested values for the `Experiment` parameter space. Consists a sample to be evaluated. """ @@ -157,21 +170,22 @@ class Param(Value): allowed_types = ('integer', 'real', 'categorical', 'fidelity') __slots__ = ('experiment', '_id', '_status', 'worker', '_working_dir', 'heartbeat', - 'submit_time', 'start_time', 'end_time', '_results', 'params', 'parents') + 'submit_time', 'start_time', 'end_time', '_results', '_params', 'parents', + 'id_override') allowed_stati = ('new', 'reserved', 'suspended', 'completed', 'interrupted', 'broken') def __init__(self, **kwargs): """See attributes of `Trial` for meaning and possible arguments for `kwargs`.""" for attrname in self.__slots__: - if attrname in ('_results', 'params', 'parents'): + if attrname in ('_results', '_params', 'parents'): setattr(self, attrname, list()) else: setattr(self, attrname, None) self.status = 'new' - # Remove useless item - kwargs.pop('_id', None) + # Store the id as an override to support different backends + self.id_override = kwargs.pop('_id', None) for attrname, value in kwargs.items(): if attrname == 'results': @@ -179,9 +193,8 @@ def __init__(self, **kwargs): for item in value: attr.append(self.Result(**item)) elif attrname == 'params': - attr = getattr(self, attrname) for item in value: - attr.append(self.Param(**item)) + self._params.append(self.Param(**item)) else: setattr(self, attrname, value) @@ -198,9 +211,8 @@ def to_dict(self): # Overwrite "results" and "params" with list of dictionaries rather # than list of Value objects - for attrname in ('results', 'params'): - trial_dictionary[attrname] = list(map(lambda x: x.to_dict(), - getattr(self, attrname))) + trial_dictionary['results'] = list(map(lambda x: x.to_dict(), self.results)) + trial_dictionary['params'] = list(map(lambda x: x.to_dict(), self._params)) trial_dictionary['_id'] = trial_dictionary.pop('id') @@ -209,10 +221,15 @@ def to_dict(self): def __str__(self): """Represent partially with a string.""" return "Trial(experiment={0}, status={1}, params={2})".format( - repr(self.experiment), repr(self._status), self.params_repr()) + repr(self.experiment), repr(self._status), self.format_params(self._params)) __repr__ = __str__ + @property + def params(self): + """Parameters of the trial""" + return unflatten({param.name: param.value for param in self._params}) + @property def results(self): """List of results of the trial""" @@ -248,15 +265,15 @@ def status(self): @status.setter def status(self, status): - if status is not None and status not in self.allowed_stati: - raise ValueError("Given status, {0}, not one of: {1}".format( - status, self.allowed_stati)) + validate_status(status) self._status = status @property def id(self): """Return hash_name which is also the database key `_id`.""" - return self.__hash__() + if self.id_override is None: + return self.__hash__() + return self.id_override @property def objective(self): @@ -282,27 +299,21 @@ def gradient(self): """ return self._fetch_one_result_of_type('gradient') - def _repr_values(self, values, sep=','): - """Represent with a string the given values.""" - return sep.join(map(lambda value: "{0.name}:{0.value}".format(value), values)) - - def params_repr(self, sep=','): - """Represent with a string the parameters contained in this `Trial` object.""" - return self._repr_values(self.params, sep) - @property def hash_name(self): """Generate a unique name with an md5sum hash for this `Trial`. .. note:: Two trials that have the same `params` must have the same `hash_name`. """ - if not self.params and not self.experiment: - raise ValueError("Cannot distinguish this trial, as 'params' or 'experiment' " - "have not been set.") - params_repr = self.params_repr() - experiment_repr = str(self.experiment) - lie_repr = self._repr_values([self.lie]) if self.lie else "" - return hashlib.md5((params_repr + experiment_repr + lie_repr).encode('utf-8')).hexdigest() + return self.compute_trial_hash(self, ignore_fidelity=False) + + @property + def hash_params(self): + """Generate a unique param md5sum hash for this `Trial`. + + .. note:: The params contributing to the hash do not include the fidelity. + """ + return self.compute_trial_hash(self, ignore_fidelity=True, ignore_lie=True) def __hash__(self): """Return the hashname for this trial""" @@ -311,10 +322,10 @@ def __hash__(self): @property def full_name(self): """Generate a unique name using the full definition of parameters.""" - if not self.params or not self.experiment: + if not self._params or not self.experiment: raise ValueError("Cannot distinguish this trial, as 'params' or 'experiment' " "have not been set.") - return self.params_repr(sep='-').replace('/', '.') + return self.format_values(self._params, sep='-').replace('/', '.') def _fetch_one_result_of_type(self, result_type, results=None): if results is None: @@ -332,3 +343,45 @@ def _fetch_one_result_of_type(self, result_type, results=None): "Optimizing according to the first one only: %s", value[0]) return value[0] + + def _repr_values(self, values, sep=','): + """Represent with a string the given values.""" + return Trial.format_values(values, sep) + + def params_repr(self, sep=',', ignore_fidelity=False): + """Represent with a string the parameters contained in this `Trial` object.""" + return Trial.format_params(self._params, sep) + + @staticmethod + def format_values(values, sep=','): + """Represent with a string the given values.""" + return sep.join(map(lambda value: "{0.name}:{0.value}".format(value), values)) + + @staticmethod + def format_params(params, sep=',', ignore_fidelity=False): + """Represent with a string the parameters contained in this `Trial` object.""" + if ignore_fidelity: + params = [x for x in params if x.type != 'fidelity'] + else: + params = params + return Trial.format_values(params, sep) + + @staticmethod + def compute_trial_hash(trial, ignore_fidelity=False, ignore_experiment=False, + ignore_lie=False): + """Generate a unique param md5sum hash for a given `Trial`""" + if not trial._params and not trial.experiment: + raise ValueError("Cannot distinguish this trial, as 'params' or 'experiment' " + "have not been set.") + + params = Trial.format_params(trial._params, ignore_fidelity=ignore_fidelity) + + experiment_repr = "" + if not ignore_experiment: + experiment_repr = str(trial.experiment) + + lie_repr = "" + if not ignore_lie and trial.lie: + lie_repr = Trial.format_values([trial.lie]) + + return hashlib.md5((params + experiment_repr + lie_repr).encode('utf-8')).hexdigest() diff --git a/src/orion/storage/base.py b/src/orion/storage/base.py index 9f8c270ae..3e45bfeef 100644 --- a/src/orion/storage/base.py +++ b/src/orion/storage/base.py @@ -1,18 +1,24 @@ # -*- coding: utf-8 -*- """ -:mod:`orion.storage.base -- Generic Storage Protocol -==================================================== +:mod:`orion.storage.base` -- Generic Storage Protocol +===================================================== .. module:: base :platform: Unix :synopsis: Implement a generic protocol to allow Orion to communicate using - different storage backend + different storage backend """ +import logging + +import orion.core from orion.core.utils import (AbstractSingletonType, SingletonFactory) +log = logging.getLogger(__name__) + + class FailedUpdate(Exception): """Exception raised when we are unable to update a trial' status""" @@ -93,7 +99,13 @@ def register_lie(self, trial): raise NotImplementedError() def reserve_trial(self, experiment): - """Select a pending trial and reserve it for the worker""" + """Select a pending trial and reserve it for the worker + + Returns + ------- + Returns the reserved trial or None if no trials were found + + """ raise NotImplementedError() def fetch_trials(self, experiment=None, uid=None): @@ -178,7 +190,7 @@ def set_trial_status(self, trial, status, heartbeat=None): def fetch_pending_trials(self, experiment): """Fetch all trials that are available to be executed by a worker, - this includes new, suspended and interupted trials + this includes new, suspended and interrupted trials """ raise NotImplementedError() @@ -186,7 +198,7 @@ def fetch_noncompleted_trials(self, experiment): """Fetch all non completed trials""" raise NotImplementedError() - def fetch_trial_by_status(self, experiment, status): + def fetch_trials_by_status(self, experiment, status): """Fetch all trials with the given status""" raise NotImplementedError() @@ -241,10 +253,41 @@ def get_storage(): with the appropriate arguments for the chosen backend """ + return Storage() + + +def setup_storage(storage=None, debug=False): + """Create the storage instance from a configuration. + + Parameters + ---------- + config: dict, optional + Configuration for the storage backend. If not defined, global configuration + is used. + debug: bool, optional + If using in debug mode, the storage config is overrided with legacy:EphemeralDB. + Defaults to False. + + """ + if storage is None: + storage = orion.core.config.storage.to_dict() + + if storage.get('type') == 'legacy' and 'database' not in storage: + storage['database'] = orion.core.config.storage.database.to_dict() + elif storage.get('type') is None and 'database' in storage: + storage['type'] = 'legacy' + + if debug: + storage = {'type': 'legacy', 'database': {'type': 'EphemeralDB'}} + + storage_type = storage.pop('type') + + log.debug("Creating %s storage client with args: %s", storage_type, storage) try: - return Storage() - except TypeError as exception: - raise RuntimeError('Singleton `Storage` was not initialized!') from exception + Storage(of_type=storage_type, **storage) + except ValueError: + if Storage().__class__.__name__.lower() != storage_type.lower(): + raise # pylint: disable=too-few-public-methods @@ -266,7 +309,7 @@ class ReadOnlyStorageProtocol(object): 'fetch_noncompleted_trials', 'fetch_pending_trials', 'fetch_lost_trials', - 'fetch_trial_by_status' + 'fetch_trials_by_status' } def __init__(self, protocol): diff --git a/src/orion/storage/legacy.py b/src/orion/storage/legacy.py index e01f0ca04..09bdae5dd 100644 --- a/src/orion/storage/legacy.py +++ b/src/orion/storage/legacy.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- """ :mod:`orion.storage.legacy` -- Legacy storage -============================================================================= +============================================= .. module:: legacy :platform: Unix @@ -9,32 +9,56 @@ """ import datetime +import json import logging import orion.core from orion.core.io.convert import JSONConverter from orion.core.io.database import Database, OutdatedDatabaseError import orion.core.utils.backward as backward -from orion.core.worker.trial import Trial +from orion.core.utils.exceptions import MissingResultFile +from orion.core.worker.trial import Trial, validate_status from orion.storage.base import BaseStorageProtocol, FailedUpdate, MissingArguments log = logging.getLogger(__name__) -def setup_database(config): +def get_database(): + """Return current database + + This is a wrapper around the Database Singleton object to provide + better error message when it is used without being initialized. + + Raises + ------ + RuntimeError + If the underlying database was not initialized prior to calling this function + + Notes + ----- + To initialize the underlying database you must first call `Database(...)` + with the appropriate arguments for the chosen backend + + """ + return Database() + + +def setup_database(config=None): """Create the Database instance from a configuration. Parameters ---------- config: dict - Configuration for the database. + Configuration for the database backend. If not defined, global configuration + is used. """ - db_opts = config['database'] - dbtype = db_opts.pop('type') + if config is None: + # TODO: How could we support orion.core.config.storage.database as well? + config = orion.core.config.database.to_dict() - if config.get("debug"): - dbtype = "EphemeralDB" + db_opts = config + dbtype = db_opts.pop('type') log.debug("Creating %s database client with args: %s", dbtype, db_opts) try: @@ -58,9 +82,9 @@ class Legacy(BaseStorageProtocol): """ - def __init__(self, config=None, setup=True): - if config is not None: - setup_database(config) + def __init__(self, database=None, setup=True): + if database is not None: + setup_database(database) self._db = Database() @@ -167,7 +191,13 @@ def retrieve_result(self, trial, results_file=None, **kwargs): This does not update the database! """ - results = JSONConverter().parse(results_file.name) + if results_file is None: + return trial + + try: + results = JSONConverter().parse(results_file.name) + except json.decoder.JSONDecodeError: + raise MissingResultFile() trial.results = [ Trial.Result( @@ -195,7 +225,7 @@ def get_trial(self, trial=None, uid=None): return Trial(**result[0]) - def _update_trial(self, trial: Trial, where=None, **kwargs) -> Trial: + def _update_trial(self, trial, where=None, **kwargs): """See :func:`~orion.storage.BaseStorageProtocol.update_trial`""" if where is None: where = dict() @@ -218,7 +248,12 @@ def fetch_lost_trials(self, experiment): def push_trial_results(self, trial): """See :func:`~orion.storage.BaseStorageProtocol.push_trial_results`""" - return self._update_trial(trial, **trial.to_dict(), where={'_id': trial.id}) + rc = self._update_trial(trial, **trial.to_dict(), + where={'_id': trial.id, 'status': 'reserved'}) + if not rc: + raise FailedUpdate() + + return rc def set_trial_status(self, trial, status, heartbeat=None): """See :func:`~orion.storage.BaseStorageProtocol.set_trial_status`""" @@ -230,18 +265,16 @@ def set_trial_status(self, trial, status, heartbeat=None): heartbeat=heartbeat, experiment=trial.experiment ) - if trial.status == 'new': - update["start_time"] = datetime.datetime.utcnow() - elif status == 'completed': - update["end_time"] = datetime.datetime.utcnow() + validate_status(status) rc = self._update_trial(trial, **update, where={'status': trial.status, '_id': trial.id}) - trial.status = status if not rc: raise FailedUpdate() + trial.status = status + def fetch_pending_trials(self, experiment): """See :func:`~orion.storage.BaseStorageProtocol.fetch_pending_trials`""" query = dict( @@ -300,8 +333,8 @@ def update_heartbeat(self, trial): """Update trial's heartbeat""" return self._update_trial(trial, heartbeat=datetime.datetime.utcnow(), status='reserved') - def fetch_trial_by_status(self, experiment, status): - """See :func:`~orion.storage.BaseStorageProtocol.fetch_trial_by_status`""" + def fetch_trials_by_status(self, experiment, status): + """See :func:`~orion.storage.BaseStorageProtocol.fetch_trials_by_status`""" query = dict( experiment=experiment._id, status=status diff --git a/src/orion/storage/track.py b/src/orion/storage/track.py new file mode 100644 index 000000000..d61e3e98f --- /dev/null +++ b/src/orion/storage/track.py @@ -0,0 +1,737 @@ +# -*- coding: utf-8 -*- +""" +:mod:`orion.storage.track` -- Track Storage Protocol +==================================================== + +.. module:: base + :platform: Unix + :synopsis: Implement a storage protocol to allow Orion to use track as a storage method + +""" + +from collections import defaultdict +import copy +import datetime +import hashlib +import logging +import sys +import warnings + +from orion.core.io.database import DuplicateKeyError +from orion.core.utils.flatten import flatten, unflatten +from orion.core.worker.trial import Trial as OrionTrial, validate_status +from orion.storage.base import BaseStorageProtocol, FailedUpdate, MissingArguments + +log = logging.getLogger(__name__) + + +# TODO: Remove this when factory is reworked +class Track: # noqa: F811 + """Forward declaration because of a weird factory bug where Track is not found""" + + def __init__(self, uri): + assert False, 'This should not be called' + + +HAS_TRACK = False +REASON = None +try: + from track.client import TrackClient + from track.persistence.utils import parse_uri + from track.serialization import to_json + from track.structure import CustomStatus, Status as TrackStatus + from track.structure import Project, Trial as TrackTrial, TrialGroup + from track.persistence.local import ConcurrentWrite + from track.utils import ItemNotFound + + HAS_TRACK = True +except ImportError: + REASON = 'Track is not installed' + +except SyntaxError: + major, minor, patch, _, _ = sys.version_info + + if minor < 6: + REASON = 'Python is too old' + log.warning('Track does not support python < 3.6!') + else: + raise + + +if HAS_TRACK: + _status = [ + CustomStatus('new', TrackStatus.CreatedGroup.value + 1), + CustomStatus('reserved', TrackStatus.CreatedGroup.value + 2), + ] + + _status_dict = { + s.name: s for s in _status + } + _status_dict['completed'] = TrackStatus.Completed + _status_dict['interrupted'] = TrackStatus.Interrupted + _status_dict['broken'] = TrackStatus.Broken + _status_dict['suspended'] = TrackStatus.Suspended + + +def get_track_status(val): + """Convert orion status to track status""" + return _status_dict.get(val) + + +def convert_track_status(status): + """Convert track status to orion status""" + return status.name.lower() + + +def remove_leading_slash(name): + """Remove leading slash""" + # if name[0] == '/': + # return name[1:] + # return name + return name + + +def add_leading_slash(name): + """Add leading slash""" + # if name[0] == '/': + # return name + # return '/' + name + return name + + +def to_epoch(date): + """Convert datetime class into seconds since epochs""" + return (date - datetime.datetime(1970, 1, 1)).total_seconds() + + +class TrialAdapter: + """Mock Trial, see `~orion.core.worker.trial.Trial` + + Parameters + ---------- + storage_trial + Track trial object + + orion_trial + Orion trial object + + objective: str + objective key + + """ + + def __init__(self, storage_trial, orion_trial=None, objective=None): + self.storage = copy.deepcopy(storage_trial) + self.memory = orion_trial + self.session_group = None + self.objective_key = objective + self.objectives_values = None + self._results = [] + + def _repr_values(self, values, sep=','): + """Represent with a string the given values.""" + return + + def __str__(self): + """Represent partially with a string.""" + param_rep = ','.join(map(lambda value: "{0.name}:{0.value}".format(value), self._params)) + ret = "TrialAdapter(uid={3}, experiment={0}, status={1}, params={2})".format( + repr(self.experiment[:10]), repr(self.status), param_rep, self.storage.uid) + return ret + + __repr__ = __str__ + + @property + def experiment(self): + """See `~orion.core.worker.trial.Trial`""" + if self.memory is not None: + return self.memory.experiment + return self.storage.group_id + + @property + def hearbeat(self): + """See `~orion.core.worker.trial.Trial`""" + return datetime.datetime.utcfromtimestamp(self.storage.metadata.get('heartbeat', 0)) + + @property + def id(self): + """See `~orion.core.worker.trial.Trial`""" + return self.storage.uid + + @property + def params(self): + """See `~orion.core.worker.trial.Trial`""" + if self.memory is not None: + return self.memory.params + + return unflatten({param.name: param.value for param in self._params}) + + @property + def _params(self): + """See `~orion.core.worker.trial.Trial`""" + if self.memory is not None: + return self.memory._params + + types = self.storage.metadata['params_types'] + params = flatten(self.storage.parameters) + + return [ + OrionTrial.Param(name=add_leading_slash(name), value=params.get(name), type=vtype) + for name, vtype in types.items() + ] + + @property + def status(self): + """See `~orion.core.worker.trial.Trial`""" + if self.memory is not None: + return self.memory.status + + return convert_track_status(self.storage.status) + + @status.setter + def status(self, value): + """See `~orion.core.worker.trial.Trial`""" + self.storage.status = get_track_status(value) + + if self.memory is not None: + self.memory.status = value + + def to_dict(self): + """See `~orion.core.worker.trial.Trial`""" + trial = copy.deepcopy(self.storage.metadata) + trial.update({ + 'results': [r.to_dict() for r in self.results], + 'params': [p.to_dict() for p in self._params], + '_id': self.storage.uid, + 'submit_time': self.submit_time, + 'experiment': self.experiment, + 'status': self.status + }) + + trial.pop('_update_count', 0) + trial.pop('metric_types', 0) + trial.pop('params_types') + + return trial + + @property + def lie(self): + """See `~orion.core.worker.trial.Trial`""" + # we do not lie like Orion does + return None + + @property + def objective(self): + """See `~orion.core.worker.trial.Trial`""" + def result(val): + return OrionTrial.Result(name=self.objective_key, value=val, type='objective') + + if self.objective_key is None: + raise RuntimeError('no objective key was defined!') + + self.objectives_values = [] + + data = self.storage.metrics.get(self.objective_key) + if data is None: + return None + + # objective was pushed without step data (already sorted) + if isinstance(data, list): + self.objectives_values = data + return result(self.objectives_values[-1]) + + # objective was pushed with step data + elif isinstance(data, dict): + for k, v in self.storage.metrics[self.objective_key].items(): + self.objectives_values.append((int(k), v)) + + self.objectives_values.sort(key=lambda x: x[0]) + return result(self.objectives_values[-1][1]) + + return None + + @property + def results(self): + """See `~orion.core.worker.trial.Trial`""" + self._results = [] + + for k, values in self.storage.metrics.items(): + result_type = 'statistic' + if k == self.objective_key: + result_type = 'objective' + + if isinstance(values, dict): + items = list(values.items()) + items.sort(key=lambda v: v[0]) + + val = items[-1][1] + self._results.append(OrionTrial.Result(name=k, type=result_type, value=val)) + elif isinstance(values, list): + self._results.append(OrionTrial.Result(name=k, type=result_type, value=values[-1])) + + return self._results + + @property + def hash_params(self): + """See `~orion.core.worker.trial.Trial`""" + return OrionTrial.compute_trial_hash(self, ignore_fidelity=True) + + @results.setter + def results(self, value): + """See `~orion.core.worker.trial.Trial`""" + self._results = value + + @property + def gradient(self): + """See `~orion.core.worker.trial.Trial`""" + return None + + @property + def submit_time(self): + """See `~orion.core.worker.trial.Trial`""" + return datetime.datetime.utcfromtimestamp(self.storage.metadata.get('submit_time')) + + @property + def end_time(self): + """See `~orion.core.worker.trial.Trial`""" + return datetime.datetime.utcfromtimestamp(self.storage.metadata.get('end_time')) + + @end_time.setter + def end_time(self, value): + """See `~orion.core.worker.trial.Trial`""" + self.storage.metadata['end_time'] = value + + @property + def heartbeat(self): + """Trial Heartbeat""" + heartbeat = self.storage.metadata.get('heartbeat') + if heartbeat: + return datetime.datetime.utcfromtimestamp(heartbeat) + return None + + @property + def parents(self): + """See `~orion.core.worker.trial.Trial`""" + return self.storage.metadata.get('parent', []) + + @parents.setter + def parents(self, other): + """See `~orion.core.worker.trial.Trial`""" + self.storage.metadata['parent'] = other + + +def experiment_uid(exp=None, name=None, version=None): + """Return an experiment uid from its name and version for Track""" + if name is None: + name = exp.name + + if version is None: + version = exp.version + + sha = hashlib.sha256() + sha.update(name.encode('utf8')) + sha.update(bytes([version])) + return sha.hexdigest() + + +class Track(BaseStorageProtocol): # noqa: F811 + """Implement a generic protocol to allow Orion to communicate using + different storage backend + + Parameters + ---------- + uri: str + Track backend to use for storage; the format is as follow + `protocol://[username:password@]host1[:port1][,...hostN[:portN]]][/[database][?options]]` + + """ + + def __init__(self, uri): + if not HAS_TRACK: + # We ignored the import error above in case we did not need track + # but now that we do we can rethrow it + raise ImportError('Track is not installed!') + + self.uri = uri + self.options = parse_uri(uri)['query'] + + self.client = TrackClient(uri) + self.backend = self.client.protocol + self.project = None + self.group = None + self.objective = self.options.get('objective') + self.lies = dict() + assert self.objective is not None, 'An objective should be defined!' + + def _get_project(self, name): + if self.project is None: + self.project = self.backend.get_project(Project(name=name)) + + if self.project is None: + self.project = self.backend.new_project(Project(name=name)) + + assert self.project, 'Project should have been found' + + def create_experiment(self, config): + """Insert a new experiment inside the database""" + self._get_project(config['name']) + + self.group = self.backend.new_trial_group( + TrialGroup( + name=experiment_uid(name=config['name'], version=config['version']), + project_id=self.project.uid, + metadata=to_json(config) + ) + ) + + if self.group is None: + raise DuplicateKeyError('Experiment was already created') + + config['_id'] = self.group.uid + return config + + def update_experiment(self, experiment=None, uid=None, where=None, **kwargs): + """See :func:`~orion.storage.BaseStorageProtocol.update_experiment`""" + if uid and experiment: + assert experiment._id == uid + + if uid is None: + if experiment is None: + raise MissingArguments('experiment or uid need to be defined') + else: + uid = experiment._id + + self.group = self.backend.fetch_and_update_group({ + '_uid': uid + }, 'set_group_metadata', **kwargs) + + return self.group + + def fetch_experiments(self, query, selection=None): + """Fetch all experiments that match the query""" + new_query = {} + for k, v in query.items(): + if k == 'name': + new_query['metadata.name'] = v + + elif k.startswith('metadata'): + new_query['metadata.{}'.format(k)] = v + + elif k == '_id': + new_query['_uid'] = v + + else: + new_query[k] = v + + groups = self.backend.fetch_groups(new_query) + + experiments = [] + for group in groups: + version = group.metadata.get('version', 0) + + # metadata is experiment config + exp = group.metadata + exp.update({ + '_id': group.uid, + 'version': version, + 'name': group.project_id, + }) + + experiments.append(exp) + + return experiments + + def register_trial(self, trial): + """Create a new trial to be executed""" + stamp = datetime.datetime.utcnow() + trial.submit_time = stamp + + metadata = dict() + # pylint: disable=protected-access + metadata['params_types'] = {remove_leading_slash(p.name): p.type for p in trial._params} + metadata['submit_time'] = to_json(trial.submit_time) + metadata['end_time'] = to_json(trial.end_time) + metadata['worker'] = trial.worker + metadata['metric_types'] = {remove_leading_slash(p.name): p.type for p in trial.results} + metadata['metric_types'][self.objective] = 'objective' + heartbeat = to_json(trial.heartbeat) + if heartbeat is None: + heartbeat = 0 + metadata['heartbeat'] = heartbeat + + metrics = defaultdict(list) + for p in trial.results: + metrics[p.name] = [p.value] + + if self.project is None: + self._get_project(self.group.project_id) + + trial = self.backend.new_trial(TrackTrial( + _hash=trial.hash_name, + status=get_track_status(trial.status), + project_id=self.project.uid, + group_id=self.group.uid, + parameters=trial.params, + metadata=metadata, + metrics=metrics + ), auto_increment=False) + + if trial is None: + raise DuplicateKeyError('Was not able to register Trial!') + + return TrialAdapter(trial, objective=self.objective) + + def register_lie(self, trial): + """Register a *fake* trial created by the strategist. + + The main difference between fake trial and original ones is the addition of a fake objective + result, and status being set to completed. The id of the fake trial is different than the id + of the original trial, but the original id can be computed using the hashcode on parameters + of the fake trial. See mod:`orion.core.worker.strategy` for more information and the + Strategist object and generation of fake trials. + + Parameters + ---------- + trial: `Trial` object + Fake trial to register in the database + + """ + warnings.warn('Track does not persist lies!') + + if trial.id in self.lies: + raise DuplicateKeyError('Lie already exists') + + self.lies[trial.id] = trial + return trial + + def _fetch_trials(self, query, *args, **kwargs): + """Fetch all the trials that match the query""" + def sort_key(item): + submit_time = item.submit_time + if submit_time is None: + return 0 + return submit_time + + query = to_json(query) + + new_query = {} + for k, v in query.items(): + if k == 'experiment': + new_query['group_id'] = v + + elif k == 'heartbeat': + new_query['metadata.heartbeat'] = v + + elif k == '_id': + new_query['uid'] = v + + elif k == 'end_time': + new_query['metadata.end_time'] = v + + elif k == 'status' and isinstance(v, str): + new_query['status'] = get_track_status(v) + + else: + new_query[k] = v + + trials = [ + TrialAdapter(t, objective=self.objective) for t in self.backend.fetch_trials(new_query) + ] + trials.sort(key=sort_key) + return trials + + _ignore_updates_for = {'results', 'params', '_id'} + + def _update_trial(self, trial, **kwargs): + """Update the fields of a given trials + + Parameters + ---------- + trial: Trial + Trial object to update + + where: Optional[dict] + constraint trial must respect + + kwargs: dict + a dictionary of fields to update + + Returns + ------- + returns true if the underlying storage was updated + + """ + try: + if isinstance(trial, TrialAdapter): + trial = trial.storage + + for key, value in kwargs.items(): + if key == 'status': + self.backend.set_trial_status(trial, get_track_status(value)) + elif key in self._ignore_updates_for: + continue + else: + pair = {key: to_json(value)} + self.backend.log_trial_metadata(trial, **pair) + + return True + except ConcurrentWrite: + return False + + def retrieve_result(self, trial, *args, **kwargs): + """Fetch the result from a given medium (file, db, socket, etc..) for a given trial and + insert it into the trial object + """ + if isinstance(trial, TrialAdapter): + trial = trial.storage + + refreshed_trial = self.backend.get_trial(trial)[0] + new_trial = TrialAdapter(refreshed_trial, objective=self.objective) + + assert new_trial.objective is not None, 'Trial should have returned an objective value!' + + log.info("trial objective is (%s: %s)", self.objective, new_trial.objective.value) + return new_trial + + def fetch_pending_trials(self, experiment): + """See :func:`~orion.storage.BaseStorageProtocol.fetch_pending_trials`""" + pending_status = ['new', 'suspended', 'interrupted'] + pending_status = [get_track_status(s) for s in pending_status] + + query = dict( + group_id=experiment.id, + status={'$in': pending_status} + ) + + return self._fetch_trials(query) + + def set_trial_status(self, trial, status, heartbeat=None): + """Update the trial status and the heartbeat + + Raises + ------ + FailedUpdate + The exception is raised if the status of the trial object + does not match the status in the database + + """ + validate_status(status) + try: + result_trial = self.backend.fetch_and_update_trial({ + 'uid': trial.id, + 'status': get_track_status(trial.status) + }, 'set_trial_status', status=get_track_status(status)) + + except ItemNotFound as e: + raise FailedUpdate() from e + + trial.status = status + return result_trial + + def fetch_trials(self, experiment=None, uid=None): + """See :func:`~orion.storage.BaseStorageProtocol.fetch_trials`""" + if uid and experiment: + assert experiment.id == uid + + if uid is None: + if experiment is None: + raise MissingArguments('experiment or uid need to be defined') + + uid = experiment.id + + return self._fetch_trials(dict(group_id=uid)) + + def get_trial(self, trial=None, uid=None): + """See :func:`~orion.storage.BaseStorageProtocol.get_trials`""" + if trial is not None and uid is not None: + assert trial.id == uid + + if uid is None: + if trial is None: + raise MissingArguments('trial or uid argument should be populated') + + uid = trial.id + + _hash, _rev = 0, 0 + data = uid.split('_', maxsplit=1) + + if len(data) == 1: + _hash = data[0] + + elif len(data) == 2: + _hash, _rev = data + + trials = self.backend.get_trial(TrackTrial(_hash=_hash, revision=_rev)) + + if trials is None: + return None + + assert len(trials) == 1 + return TrialAdapter(trials[0], objective=self.objective) + + def reserve_trial(self, experiment): + """Select a pending trial and reserve it for the worker""" + query = dict( + group_id=experiment.id, + status={'$in': ['new', 'suspended', 'interrupted']} + ) + + try: + trial = self.backend.fetch_and_update_trial( + query, + 'set_trial_status', + status=get_track_status('reserved')) + + except ItemNotFound: + return None + + if trial is None: + return None + + return TrialAdapter(trial, objective=self.objective) + + def fetch_lost_trials(self, experiment): + """Fetch all trials that have a heartbeat older than + some given time delta (2 minutes by default) + """ + # TODO: Configure this + threshold = to_epoch(datetime.datetime.utcnow() - datetime.timedelta(seconds=60 * 2)) + lte_comparison = {'$lte': threshold} + query = { + 'experiment': experiment.id, + 'status': 'reserved', + 'heartbeat': lte_comparison + } + + return self._fetch_trials(query) + + def push_trial_results(self, trial): + """Push the trial's results to the database""" + # Track already pushed the info no need to do it here + pass + + def fetch_noncompleted_trials(self, experiment): + """Fetch all non completed trials""" + query = dict( + group_id=experiment.id, + status={'$ne': get_track_status('completed')} + ) + return self.backend.fetch_trials(query) + + def fetch_trials_by_status(self, experiment, status): + """Fetch all trials with the given status""" + trials = self._fetch_trials(dict(status=status, group_id=experiment.id)) + return trials + + def count_completed_trials(self, experiment): + """Count the number of completed trials""" + return len(self._fetch_trials(dict(status='completed', group_id=experiment.id))) + + def count_broken_trials(self, experiment): + """Count the number of broken trials""" + return len(self._fetch_trials(dict(status='broken', group_id=experiment.id))) + + def update_heartbeat(self, trial): + """Update trial's heartbeat""" + self.backend.log_trial_metadata(trial.storage, + heartbeat=to_epoch(datetime.datetime.utcnow())) diff --git a/tests/conftest.py b/tests/conftest.py index 047fb77ca..cab11c5e6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- """Common fixtures and utils for unittests and functional tests.""" import os +import tempfile import numpy from pymongo import MongoClient @@ -15,11 +16,27 @@ from orion.core.io.database.mongodb import MongoDB from orion.core.io.database.pickleddb import PickledDB import orion.core.utils.backward as backward +from orion.core.utils.tests import update_singletons from orion.core.worker.trial import Trial from orion.storage.base import Storage from orion.storage.legacy import Legacy +@pytest.fixture(scope="session", autouse=True) +def shield_from_user_config(request): + """Do not read user's yaml global config.""" + _pop_out_yaml_from_config(orion.core.config) + + +def _pop_out_yaml_from_config(config): + """Remove any configuration fetch from yaml file""" + for key in config._config.keys(): + config._config[key].pop('yaml', None) + + for key in config._subconfigs.keys(): + _pop_out_yaml_from_config(config._subconfigs[key]) + + class DumbAlgo(BaseAlgorithm): """Stab class for `BaseAlgorithm`.""" @@ -32,6 +49,7 @@ def __init__(self, space, value=5, self._num = 0 self._index = 0 self._points = [] + self._suggested = None self._results = [] self._score_point = None self._judge_point = None @@ -55,14 +73,17 @@ def seed(self, seed): @property def state_dict(self): """Return a state dict that can be used to reset the state of the algorithm.""" - return {'index': self._index, 'suggested': self._suggested, 'num': self._num, - 'done': self.done} + _state_dict = super(DumbAlgo, self).state_dict + _state_dict.update({'index': self._index, 'suggested': self._suggested, 'num': self._num, + 'done': self.done}) + return _state_dict def set_state(self, state_dict): """Reset the state of the algorithm based on the given state_dict :param state_dict: Dictionary representing state of an algorithm """ + super(DumbAlgo, self).set_state(state_dict) self._index = state_dict['index'] self._suggested = state_dict['suggested'] self._num = state_dict['num'] @@ -84,6 +105,7 @@ def suggest(self, num=1): def observe(self, points, results): """Log inputs.""" + super(DumbAlgo, self).observe(points, results) self._points += points self._results += results @@ -170,7 +192,7 @@ def exp_config(): for config in exp_config[0]: config["metadata"]["user_script"] = os.path.join( os.path.dirname(__file__), config["metadata"]["user_script"]) - backward.populate_priors(config['metadata']) + backward.populate_space(config) config['version'] = 1 return exp_config @@ -223,8 +245,8 @@ def version_XYZ(monkeypatch): """Force orion version XYZ on output of resolve_config.fetch_metadata""" non_patched_fetch_metadata = resolve_config.fetch_metadata - def fetch_metadata(cmdargs): - metadata = non_patched_fetch_metadata(cmdargs) + def fetch_metadata(user=None, user_args=None): + metadata = non_patched_fetch_metadata(user, user_args) metadata['orion_version'] = 'XYZ' return metadata monkeypatch.setattr(resolve_config, "fetch_metadata", fetch_metadata) @@ -234,15 +256,13 @@ def fetch_metadata(cmdargs): def create_db_instance(null_db_instances, clean_db): """Create and save a singleton database instance.""" try: - config = { - 'database': { - 'type': 'MongoDB', - 'name': 'orion_test', - 'username': 'user', - 'password': 'pass' - } + database = { + 'type': 'MongoDB', + 'name': 'orion_test', + 'username': 'user', + 'password': 'pass' } - db = Storage(of_type='legacy', config=config) + db = Storage(of_type='legacy', database=database) except ValueError: db = Storage() @@ -269,3 +289,17 @@ def fixed_dictionary(user_script): vcs['diff_sha'] = "diff" return vcs monkeypatch.setattr(resolve_config, "infer_versioning_metadata", fixed_dictionary) + + +@pytest.fixture(scope="function") +def setup_pickleddb_database(): + """Configure the database""" + update_singletons() + temporary_file = tempfile.NamedTemporaryFile() + + os.environ['ORION_DB_TYPE'] = "pickleddb" + os.environ['ORION_DB_ADDRESS'] = temporary_file.name + yield + temporary_file.close() + del os.environ['ORION_DB_TYPE'] + del os.environ['ORION_DB_ADDRESS'] diff --git a/tests/functional/algos/hyperband.yaml b/tests/functional/algos/hyperband.yaml new file mode 100644 index 000000000..7ef02cf2e --- /dev/null +++ b/tests/functional/algos/hyperband.yaml @@ -0,0 +1,13 @@ +name: demo_algo + +max_trials: 100 + +algorithms: + hyperband: + repetitions: 5 + seed: 1 + +database: + type: 'mongodb' + name: 'orion_test' + host: 'mongodb://user:pass@localhost' \ No newline at end of file diff --git a/tests/functional/algos/test_algos.py b/tests/functional/algos/test_algos.py index 4375bd776..8cbdaaf6d 100644 --- a/tests/functional/algos/test_algos.py +++ b/tests/functional/algos/test_algos.py @@ -10,8 +10,8 @@ from orion.storage.base import get_storage -config_files = ['random_config.yaml'] -fidelity_config_files = ['random_config.yaml', 'asha_config.yaml'] +config_files = ['random_config.yaml', 'tpe.yaml'] +fidelity_config_files = ['random_config.yaml', 'asha_config.yaml', 'hyperband.yaml'] fidelity_only_config_files = list(set(fidelity_config_files) - set(config_files)) @@ -54,8 +54,7 @@ def test_simple(monkeypatch, config_file): assert 'datetime' in exp['metadata'] assert 'orion_version' in exp['metadata'] assert 'user_script' in exp['metadata'] - assert os.path.isabs(exp['metadata']['user_script']) - assert exp['metadata']['user_args'] == ['-x~uniform(-50, 50)'] + assert exp['metadata']['user_args'] == ['./black_box.py', '-x~uniform(-50, 50)'] trials = storage.fetch_trials(uid=exp_id) assert len(trials) <= config['max_trials'] @@ -65,11 +64,36 @@ def test_simple(monkeypatch, config_file): assert best_trial.objective.name == 'example_objective' assert abs(best_trial.objective.value - 23.4) < 1e-5 assert len(best_trial.params) == 1 - param = best_trial.params[0] + param = best_trial._params[0] assert param.name == '/x' assert param.type == 'real' +@pytest.mark.usefixtures("clean_db") +@pytest.mark.usefixtures("null_db_instances") +@pytest.mark.parametrize('config_file', config_files) +def test_random_stop(monkeypatch, config_file): + """Test a simple usage scenario.""" + monkeypatch.chdir(os.path.dirname(os.path.abspath(__file__))) + orion.core.cli.main(["hunt", "--config", config_file, + "./black_box.py", "-x~uniform(-10, 5, discrete=True)"]) + + with open(config_file, 'rb') as f: + config = yaml.safe_load(f) + + storage = get_storage() + exp = list(storage.fetch_experiments({'name': config['name']})) + assert len(exp) == 1 + exp = exp[0] + assert '_id' in exp + exp_id = exp['_id'] + + trials = storage.fetch_trials(uid=exp_id) + assert len(trials) <= config['max_trials'] + assert len(trials) == 15 + assert trials[-1].status == 'completed' + + @pytest.mark.usefixtures("clean_db") @pytest.mark.usefixtures("null_db_instances") @pytest.mark.parametrize('config_file', fidelity_config_files) @@ -77,7 +101,8 @@ def test_with_fidelity(database, monkeypatch, config_file): """Test a scenario with fidelity.""" monkeypatch.chdir(os.path.dirname(os.path.abspath(__file__))) orion.core.cli.main(["hunt", "--config", config_file, - "./black_box.py", "-x~uniform(-50, 50)", "--fidelity~fidelity(1,10,4)"]) + "./black_box.py", "-x~uniform(-50, 50, precision=None)", + "--fidelity~fidelity(1,10,4)"]) with open(config_file, 'rb') as f: config = yaml.safe_load(f) @@ -96,8 +121,8 @@ def test_with_fidelity(database, monkeypatch, config_file): assert 'datetime' in exp['metadata'] assert 'orion_version' in exp['metadata'] assert 'user_script' in exp['metadata'] - assert os.path.isabs(exp['metadata']['user_script']) - assert exp['metadata']['user_args'] == ['-x~uniform(-50, 50)', "--fidelity~fidelity(1,10,4)"] + assert exp['metadata']['user_args'] == ['./black_box.py', '-x~uniform(-50, 50, precision=None)', + '--fidelity~fidelity(1,10,4)'] trials = storage.fetch_trials(uid=exp_id) assert len(trials) <= config['max_trials'] @@ -107,10 +132,10 @@ def test_with_fidelity(database, monkeypatch, config_file): assert best_trial.objective.name == 'example_objective' assert abs(best_trial.objective.value - 23.4) < 1e-5 assert len(best_trial.params) == 2 - fidelity = best_trial.params[0] + fidelity = best_trial._params[0] assert fidelity.name == '/fidelity' assert fidelity.type == 'fidelity' assert fidelity.value == 10 - param = best_trial.params[1] + param = best_trial._params[1] assert param.name == '/x' assert param.type == 'real' diff --git a/tests/functional/algos/tpe.yaml b/tests/functional/algos/tpe.yaml new file mode 100644 index 000000000..b193a606d --- /dev/null +++ b/tests/functional/algos/tpe.yaml @@ -0,0 +1,18 @@ +name: demo_algo + +max_trials: 100 + +algorithms: + tpe: + seed: 1 + n_initial_points: 20 + n_ei_candidates: 24 + gamma: 0.25 + equal_weight: False + prior_weight: 1.0 + full_weight_num: 25 + +database: + type: 'mongodb' + name: 'orion_test' + host: 'mongodb://user:pass@localhost' \ No newline at end of file diff --git a/tests/functional/backward_compatibility/test_versions.py b/tests/functional/backward_compatibility/test_versions.py index edd76678c..64a89833a 100644 --- a/tests/functional/backward_compatibility/test_versions.py +++ b/tests/functional/backward_compatibility/test_versions.py @@ -8,19 +8,24 @@ from pymongo import MongoClient import pytest +from orion.client import create_experiment from orion.core.io.database import Database, OutdatedDatabaseError from orion.core.io.database.mongodb import MongoDB from orion.core.io.database.pickleddb import PickledDB -from orion.core.io.experiment_builder import ExperimentBuilder +import orion.core.io.experiment_builder as experiment_builder from orion.storage.base import get_storage, Storage from orion.storage.legacy import Legacy DIRNAME = os.path.dirname(os.path.abspath(__file__)) +PYTHON_SCRIPT_PATH = os.path.join(DIRNAME, 'python_api.py') SCRIPT_PATH = os.path.join(DIRNAME, 'black_box.py') CONFIG_FILE = os.path.join(DIRNAME, 'random.yaml') +# Ignore pre-0.1.3 because there was no PickleDB backend. +VERSIONS = ['0.1.3', '0.1.4', '0.1.5', '0.1.6', '0.1.7'] + def get_package(version): """Get package name based on version. @@ -33,8 +38,12 @@ def get_package(version): return 'orion.core' -# Ignore pre-0.1.3 because there was no PickleDB backend. -VERSIONS = ['0.1.3', '0.1.4', '0.1.5', '0.1.6'] +def get_branch_argument(version): + """Get argument to branch. + + Before v0.1.8 it was --branch. From v0.1.8 and forward it is now --branch-to. + """ + return '--branch' if version < '0.1.8' else '--branch-to' def clean_mongodb(): @@ -99,6 +108,35 @@ def get_virtualenv_dir(version): return 'version-{}'.format(version) +def fill_from_cmdline_api(orion_script, version): + """Add experiments and trials using the commandline API""" + print(execute(' '.join([ + orion_script, '-vv', 'init_only', '--name', 'init-cmdline', + '--config', CONFIG_FILE, + SCRIPT_PATH, '-x~uniform(-50,50)']))) + + print(execute(' '.join([ + orion_script, '-vv', 'init_only', '--name', 'init-cmdline', + get_branch_argument(version), 'init-cmdline-branch-old', + '--config', CONFIG_FILE]))) + + print(execute(' '.join([ + orion_script, '-vv', 'hunt', '--name', 'hunt-cmdline', + '--config', CONFIG_FILE, + SCRIPT_PATH, '-x~uniform(-50,50)']))) + + print(execute(' '.join([ + orion_script, '-vv', 'hunt', '--name', 'hunt-cmdline', + get_branch_argument(version), 'hunt-cmdline-branch-old', + '--config', CONFIG_FILE]))) + + +def fill_from_python_api(python_script, version): + """Add experiments and trials using the python API""" + print(execute(' '.join([ + python_script, PYTHON_SCRIPT_PATH, version]))) + + @pytest.fixture(scope='class', params=VERSIONS) def fill_db(request): """Add experiments and trials in DB for given version of Oríon.""" @@ -109,29 +147,14 @@ def fill_db(request): setup_virtualenv(version) orion_script = os.path.join(get_virtualenv_dir(version), 'bin', 'orion') + python_script = os.path.join(get_virtualenv_dir(version), 'bin', 'python') orion_version = get_version(orion_script) assert orion_version == 'orion {}'.format(version) - print(execute(' '.join([ - orion_script, '-vv', 'init_only', '--name', 'init', - '--config', CONFIG_FILE, - SCRIPT_PATH, '-x~uniform(-50,50)']))) - - print(execute(' '.join([ - orion_script, '-vv', 'init_only', '--name', 'init', - '--branch', 'init-branch-old', - '--config', CONFIG_FILE]))) - - print(execute(' '.join([ - orion_script, '-vv', 'hunt', '--name', 'hunt', - '--config', CONFIG_FILE, - SCRIPT_PATH, '-x~uniform(-50,50)']))) - - print(execute(' '.join([ - orion_script, '-vv', 'hunt', '--name', 'hunt', - '--branch', 'hunt-branch-old', - '--config', CONFIG_FILE]))) + fill_from_cmdline_api(orion_script, version) + if version > '0.1.7': + fill_from_python_api(python_script, version) orion_version = get_version('orion') assert orion_version != 'orion {}'.format(version) @@ -143,6 +166,8 @@ def fill_db(request): print(execute('orion -vv db upgrade -f')) + return version + def null_db_instances(): """Nullify singleton instance so that we can assure independent instantiation tests.""" @@ -156,9 +181,7 @@ def null_db_instances(): def build_storage(): """Build storage from scratch""" null_db_instances() - experiment_builder = ExperimentBuilder() - local_config = experiment_builder.fetch_full_config({}, use_db=False) - experiment_builder.setup_storage(local_config) + experiment_builder.setup_storage() return get_storage() @@ -184,37 +207,71 @@ def test_db_test(self): out = execute('orion db test') assert 'Failure' not in out - def test_list(self): + def test_list(self, fill_db): """Verify list command""" out = execute('orion list') - assert 'init-v1' in out - assert 'init-branch-old-v1' in out - assert 'hunt-v1' in out - assert 'hunt-branch-old-v1' in out + assert 'init-cmdline-v1' in out + assert 'init-cmdline-branch-old-v1' in out + assert 'hunt-cmdline-v1' in out + assert 'hunt-cmdline-branch-old-v1' in out + + version = fill_db + if version > '0.1.7': + assert 'hunt-python-v1' in out - def test_status(self): + def test_status(self, fill_db): """Verify status command""" out = execute('orion status') - assert 'init-v1' in out - assert 'init-branch-old-v1' in out - assert 'hunt-v1' in out - assert 'hunt-branch-old-v1' in out + assert 'init-cmdline-v1' in out + assert 'init-cmdline-branch-old-v1' in out + assert 'hunt-cmdline-v1' in out + assert 'hunt-cmdline-branch-old-v1' in out + + version = fill_db + if version > '0.1.7': + assert 'hunt-python-v1' in out + + def test_info_cmdline_api(self): + """Verify info command from commandline api""" + out = execute('orion info --name hunt-cmdline') + assert 'name: hunt-cmdline' in out + + def test_info_python_api(self, fill_db): + """Verify info command from python api""" + version = fill_db + if version < '0.1.8': + pytest.skip("Python API not supported by {}".format(version)) - def test_info(self): - """Verify info command""" - out = execute('orion info --name hunt') - assert 'name: hunt' in out + out = execute('orion info --name hunt-python') + assert 'name: hunt-python' in out def test_init_only(self): """Verify init_only command""" print(execute(' '.join([ - 'orion', 'init_only', '--name', 'init', - '--branch', 'init-branch']))) + 'orion', 'init_only', '--name', 'init-cmdline', + '--branch-to', 'init-cmdline-branch']))) - def test_hunt(self): - """Verify hunt command""" + def test_hunt_cmdline_api(self): + """Verify hunt command from cmdline api parent""" print(execute(' '.join([ - 'orion', 'hunt', '--name', 'hunt', - '--branch', 'hunt-branch']))) + 'orion', 'hunt', '--name', 'hunt-cmdline', + '--branch-to', 'hunt-cmdline-branch']))) + + def test_hunt_python_api(self, fill_db): + """Verify hunt command from python api parent""" + version = fill_db + if version < '0.1.8': + pytest.skip("Python API not supported by {}".format(version)) + + def function(x): + """Evaluate partial information of a quadratic.""" + z = x - 34.56789 + return [dict( + name='example_objective', + type='objective', + value=4 * z**2 + 23.4)] + + exp = create_experiment('hunt-python', branching={'branch-to': 'hunt-python-branch'}) + exp.workon(function, max_trials=10) # orion.core.cli.main('init-only') # TODO: deprecate init_only diff --git a/tests/functional/branching/black_box_new.py b/tests/functional/branching/black_box_new.py index 26274dc53..4fa8aeacc 100755 --- a/tests/functional/branching/black_box_new.py +++ b/tests/functional/branching/black_box_new.py @@ -18,7 +18,7 @@ def execute(): # 1. Receive inputs as you want parser = argparse.ArgumentParser() parser.add_argument('-x', type=float, required=True) - parser.add_argument('--a-new', type=str, required=True) + parser.add_argument('--a-new', type=str) inputs = parser.parse_args() # 2. Perform computations diff --git a/tests/functional/branching/test_branching.py b/tests/functional/branching/test_branching.py index ea63bf8f9..be26516af 100644 --- a/tests/functional/branching/test_branching.py +++ b/tests/functional/branching/test_branching.py @@ -7,8 +7,14 @@ import pytest import orion.core.cli -from orion.core.io.evc_builder import EVCBuilder -from orion.core.worker.experiment import ExperimentView +import orion.core.io.experiment_builder as experiment_builder +from orion.storage.base import get_storage + + +def execute(command, assert_code=0): + """Execute orion command and return returncode""" + returncode = orion.core.cli.main(command.split(' ')) + assert returncode == assert_code @pytest.fixture @@ -27,13 +33,14 @@ def init_full_x_full_y(init_full_x): name = "full_x" branch = "full_x_full_y" orion.core.cli.main( - ("init_only -n {name} --branch {branch} ./black_box_with_y.py " + ("init_only -n {branch} --branch-from {name} --cli-change-type noeffect " + "./black_box_with_y.py " "-x~uniform(-10,10) " "-y~+uniform(-10,10,default_value=1)").format(name=name, branch=branch).split(" ")) - orion.core.cli.main("insert -n {name} script -x=1 -y=1".format(name=branch).split(" ")) - orion.core.cli.main("insert -n {name} script -x=-1 -y=1".format(name=branch).split(" ")) - orion.core.cli.main("insert -n {name} script -x=1 -y=-1".format(name=branch).split(" ")) - orion.core.cli.main("insert -n {name} script -x=-1 -y=-1".format(name=branch).split(" ")) + orion.core.cli.main("insert -n {branch} script -x=1 -y=1".format(branch=branch).split(" ")) + orion.core.cli.main("insert -n {branch} script -x=-1 -y=1".format(branch=branch).split(" ")) + orion.core.cli.main("insert -n {branch} script -x=1 -y=-1".format(branch=branch).split(" ")) + orion.core.cli.main("insert -n {branch} script -x=-1 -y=-1".format(branch=branch).split(" ")) @pytest.fixture @@ -42,11 +49,11 @@ def init_half_x_full_y(init_full_x_full_y): name = "full_x_full_y" branch = "half_x_full_y" orion.core.cli.main( - ("init_only -n {name} --branch {branch} ./black_box_with_y.py " + ("init_only -n {branch} --branch-from {name} ./black_box_with_y.py " "-x~+uniform(0,10) " "-y~uniform(-10,10,default_value=1)").format(name=name, branch=branch).split(" ")) - orion.core.cli.main("insert -n {name} script -x=2 -y=2".format(name=branch).split(" ")) - orion.core.cli.main("insert -n {name} script -x=2 -y=-2".format(name=branch).split(" ")) + orion.core.cli.main("insert -n {branch} script -x=2 -y=2".format(branch=branch).split(" ")) + orion.core.cli.main("insert -n {branch} script -x=2 -y=-2".format(branch=branch).split(" ")) @pytest.fixture @@ -55,11 +62,11 @@ def init_full_x_half_y(init_full_x_full_y): name = "full_x_full_y" branch = "full_x_half_y" orion.core.cli.main( - ("init_only -n {name} --branch {branch} ./black_box_with_y.py " + ("init_only -n {branch} --branch-from {name} ./black_box_with_y.py " "-x~uniform(-10,10) " "-y~+uniform(0,10,default_value=1)").format(name=name, branch=branch).split(" ")) - orion.core.cli.main("insert -n {name} script -x=3 -y=3".format(name=branch).split(" ")) - orion.core.cli.main("insert -n {name} script -x=-3 -y=3".format(name=branch).split(" ")) + orion.core.cli.main("insert -n {branch} script -x=3 -y=3".format(branch=branch).split(" ")) + orion.core.cli.main("insert -n {branch} script -x=-3 -y=3".format(branch=branch).split(" ")) @pytest.fixture @@ -67,13 +74,14 @@ def init_full_x_rename_y_z(init_full_x_full_y): """Rename y from full x full y to z""" name = "full_x_full_y" branch = "full_x_rename_y_z" - orion.core.cli.main(("init_only -n {name} --branch {branch} ./black_box_with_z.py " + orion.core.cli.main(("init_only -n {branch} --branch-from {name} --cli-change-type noeffect " + "./black_box_with_z.py " "-x~uniform(-10,10) -y~>z -z~uniform(-10,10,default_value=1)" ).format(name=name, branch=branch).split(" ")) - orion.core.cli.main("insert -n {name} script -x=4 -z=4".format(name=branch).split(" ")) - orion.core.cli.main("insert -n {name} script -x=-4 -z=4".format(name=branch).split(" ")) - orion.core.cli.main("insert -n {name} script -x=4 -z=-4".format(name=branch).split(" ")) - orion.core.cli.main("insert -n {name} script -x=-4 -z=-4".format(name=branch).split(" ")) + orion.core.cli.main("insert -n {branch} script -x=4 -z=4".format(branch=branch).split(" ")) + orion.core.cli.main("insert -n {branch} script -x=-4 -z=4".format(branch=branch).split(" ")) + orion.core.cli.main("insert -n {branch} script -x=4 -z=-4".format(branch=branch).split(" ")) + orion.core.cli.main("insert -n {branch} script -x=-4 -z=-4".format(branch=branch).split(" ")) @pytest.fixture @@ -81,11 +89,12 @@ def init_full_x_rename_half_y_half_z(init_full_x_half_y): """Rename y from full x half y to z""" name = "full_x_half_y" branch = "full_x_rename_half_y_half_z" - orion.core.cli.main(("init_only -n {name} --branch {branch} ./black_box_with_z.py " + orion.core.cli.main(("init_only -n {branch} --branch-from {name} --cli-change-type noeffect " + "./black_box_with_z.py " "-x~uniform(-10,10) -y~>z -z~uniform(0,10,default_value=1)" ).format(name=name, branch=branch).split(" ")) - orion.core.cli.main("insert -n {name} script -x=5 -z=5".format(name=branch).split(" ")) - orion.core.cli.main("insert -n {name} script -x=-5 -z=5".format(name=branch).split(" ")) + orion.core.cli.main("insert -n {branch} script -x=5 -z=5".format(branch=branch).split(" ")) + orion.core.cli.main("insert -n {branch} script -x=-5 -z=5".format(branch=branch).split(" ")) @pytest.fixture @@ -94,13 +103,14 @@ def init_full_x_rename_half_y_full_z(init_full_x_half_y): name = "full_x_half_y" branch = "full_x_rename_half_y_full_z" orion.core.cli.main( - ("init_only -n {name} --branch {branch} ./black_box_with_z.py " + ("init_only -n {branch} --branch-from {name} --cli-change-type noeffect " + "./black_box_with_z.py " "-x~uniform(-10,10) -y~>z " "-z~+uniform(-10,10,default_value=1)").format(name=name, branch=branch).split(" ")) - orion.core.cli.main("insert -n {name} script -x=6 -z=6".format(name=branch).split(" ")) - orion.core.cli.main("insert -n {name} script -x=-6 -z=6".format(name=branch).split(" ")) - orion.core.cli.main("insert -n {name} script -x=6 -z=-6".format(name=branch).split(" ")) - orion.core.cli.main("insert -n {name} script -x=-6 -z=-6".format(name=branch).split(" ")) + orion.core.cli.main("insert -n {branch} script -x=6 -z=6".format(branch=branch).split(" ")) + orion.core.cli.main("insert -n {branch} script -x=-6 -z=6".format(branch=branch).split(" ")) + orion.core.cli.main("insert -n {branch} script -x=6 -z=-6".format(branch=branch).split(" ")) + orion.core.cli.main("insert -n {branch} script -x=-6 -z=-6".format(branch=branch).split(" ")) @pytest.fixture @@ -109,10 +119,11 @@ def init_full_x_remove_y(init_full_x_full_y): name = "full_x_full_y" branch = "full_x_remove_y" orion.core.cli.main( - ("init_only -n {name} --branch {branch} ./black_box.py " + ("init_only -n {branch} --branch-from {name} --cli-change-type noeffect " + "./black_box.py " "-x~uniform(-10,10) -y~-").format(name=name, branch=branch).split(" ")) - orion.core.cli.main("insert -n {name} script -x=7".format(name=branch).split(" ")) - orion.core.cli.main("insert -n {name} script -x=-7".format(name=branch).split(" ")) + orion.core.cli.main("insert -n {branch} script -x=7".format(branch=branch).split(" ")) + orion.core.cli.main("insert -n {branch} script -x=-7".format(branch=branch).split(" ")) @pytest.fixture @@ -121,10 +132,11 @@ def init_full_x_remove_z(init_full_x_rename_y_z): name = "full_x_rename_y_z" branch = "full_x_remove_z" orion.core.cli.main( - ("init_only -n {name} --branch {branch} ./black_box.py " + ("init_only -n {branch} --branch-from {name} --cli-change-type noeffect " + "./black_box.py " "-x~uniform(-10,10) -z~-").format(name=name, branch=branch).split(" ")) - orion.core.cli.main("insert -n {name} script -x=8".format(name=branch).split(" ")) - orion.core.cli.main("insert -n {name} script -x=-8".format(name=branch).split(" ")) + orion.core.cli.main("insert -n {branch} script -x=8".format(branch=branch).split(" ")) + orion.core.cli.main("insert -n {branch} script -x=-8".format(branch=branch).split(" ")) @pytest.fixture @@ -133,10 +145,11 @@ def init_full_x_remove_z_default_4(init_full_x_rename_y_z): name = "full_x_rename_y_z" branch = "full_x_remove_z_default_4" orion.core.cli.main( - ("init_only -n {name} --branch {branch} ./black_box.py " + ("init_only -n {branch} --branch-from {name} --cli-change-type noeffect " + "./black_box.py " "-x~uniform(-10,10) -z~-4").format(name=name, branch=branch).split(" ")) - orion.core.cli.main("insert -n {name} script -x=9".format(name=branch).split(" ")) - orion.core.cli.main("insert -n {name} script -x=-9".format(name=branch).split(" ")) + orion.core.cli.main("insert -n {branch} script -x=9".format(branch=branch).split(" ")) + orion.core.cli.main("insert -n {branch} script -x=-9".format(branch=branch).split(" ")) @pytest.fixture @@ -145,10 +158,11 @@ def init_full_x_new_algo(init_full_x): name = "full_x" branch = "full_x_new_algo" orion.core.cli.main( - ("init_only -n {name} --branch {branch} --algorithm-change --config new_algo_config.yaml " + ("init_only -n {branch} --branch-from {name} " + "--algorithm-change --config new_algo_config.yaml " "./black_box.py -x~uniform(-10,10)").format(name=name, branch=branch).split(" ")) - orion.core.cli.main("insert -n {name} script -x=1.1".format(name=branch).split(" ")) - orion.core.cli.main("insert -n {name} script -x=-1.1".format(name=branch).split(" ")) + orion.core.cli.main("insert -n {branch} script -x=1.1".format(branch=branch).split(" ")) + orion.core.cli.main("insert -n {branch} script -x=-1.1".format(branch=branch).split(" ")) @pytest.fixture @@ -157,10 +171,26 @@ def init_full_x_new_cli(init_full_x): name = "full_x" branch = "full_x_new_cli" orion.core.cli.main( - ("init_only -n {name} --branch {branch} --cli-change-type noeffect ./black_box_new.py " + ("init_only -n {branch} --branch-from {name} --cli-change-type noeffect ./black_box_new.py " "-x~uniform(-10,10) --a-new argument").format(name=name, branch=branch).split(" ")) - orion.core.cli.main("insert -n {name} script -x=1.2".format(name=branch).split(" ")) - orion.core.cli.main("insert -n {name} script -x=-1.2".format(name=branch).split(" ")) + orion.core.cli.main("insert -n {branch} script -x=1.2".format(branch=branch).split(" ")) + orion.core.cli.main("insert -n {branch} script -x=-1.2".format(branch=branch).split(" ")) + + +@pytest.fixture +def init_full_x_ignore_cli(init_full_x): + """Use the --non-monitored-arguments argument""" + name = "full_x_with_new_opt" + orion.core.cli.main(("init_only -n {name} --config orion_config.yaml ./black_box_new.py " + "-x~uniform(-10,10)").format(name=name).split(" ")) + orion.core.cli.main("insert -n {name} script -x=0".format(name=name).split(" ")) + + orion.core.cli.main( + ("init_only -n {name} --non-monitored-arguments a-new " + "--config orion_config.yaml ./black_box_new.py " + "-x~uniform(-10,10) --a-new argument").format(name=name).split(" ")) + orion.core.cli.main("insert -n {name} script -x=1.2".format(name=name).split(" ")) + orion.core.cli.main("insert -n {name} script -x=-1.2".format(name=name).split(" ")) @pytest.fixture @@ -179,7 +209,7 @@ def get_name_value_pairs(trials): pairs = [] for trial in trials: pairs.append([]) - for param in trial.params: + for param in trial._params: pairs[-1].append((param.name, param.value)) pairs[-1] = tuple(pairs[-1]) @@ -189,21 +219,20 @@ def get_name_value_pairs(trials): def test_init(init_full_x, create_db_instance): """Test if original experiment contains trial 0""" - experiment = ExperimentView('full_x') + experiment = experiment_builder.build_view(name='full_x') pairs = get_name_value_pairs(experiment.fetch_trials()) assert pairs == ((('/x', 0), ), ) def test_full_x_full_y(init_full_x_full_y, create_db_instance): """Test if full x full y is properly initialized and can fetch original trial""" - experiment = EVCBuilder().build_view_from({'name': 'full_x_full_y'}) + experiment = experiment_builder.build_view(name='full_x_full_y') pairs = get_name_value_pairs(experiment.fetch_trials()) assert pairs == ((('/x', 1), ('/y', 1)), (('/x', -1), ('/y', 1)), (('/x', 1), ('/y', -1)), (('/x', -1), ('/y', -1))) - # pytest.set_trace() pairs = get_name_value_pairs(experiment.fetch_trials(with_evc_tree=True)) assert pairs == ((('/x', 0), ('/y', 1)), (('/x', 1), ('/y', 1)), @@ -214,7 +243,7 @@ def test_full_x_full_y(init_full_x_full_y, create_db_instance): def test_half_x_full_y(init_half_x_full_y, create_db_instance): """Test if half x full y is properly initialized and can fetch from its 2 parents""" - experiment = EVCBuilder().build_view_from({'name': 'half_x_full_y'}) + experiment = experiment_builder.build_view(name='half_x_full_y') pairs = get_name_value_pairs(experiment.fetch_trials()) assert pairs == ((('/x', 2), ('/y', 2)), (('/x', 2), ('/y', -2))) @@ -229,7 +258,7 @@ def test_half_x_full_y(init_half_x_full_y, create_db_instance): def test_full_x_half_y(init_full_x_half_y, create_db_instance): """Test if full x half y is properly initialized and can fetch from its 2 parents""" - experiment = EVCBuilder().build_view_from({'name': 'full_x_half_y'}) + experiment = experiment_builder.build_view(name='full_x_half_y') pairs = get_name_value_pairs(experiment.fetch_trials()) assert pairs == ((('/x', 3), ('/y', 3)), (('/x', -3), ('/y', 3))) @@ -244,7 +273,7 @@ def test_full_x_half_y(init_full_x_half_y, create_db_instance): def test_full_x_rename_y_z(init_full_x_rename_y_z, create_db_instance): """Test if full x full z is properly initialized and can fetch from its 2 parents""" - experiment = EVCBuilder().build_view_from({'name': 'full_x_rename_y_z'}) + experiment = experiment_builder.build_view(name='full_x_rename_y_z') pairs = get_name_value_pairs(experiment.fetch_trials()) assert pairs == ((('/x', 4), ('/z', 4)), (('/x', -4), ('/z', 4)), @@ -265,7 +294,7 @@ def test_full_x_rename_y_z(init_full_x_rename_y_z, create_db_instance): def test_full_x_rename_half_y_half_z(init_full_x_rename_half_y_half_z, create_db_instance): """Test if full x half z is properly initialized and can fetch from its 3 parents""" - experiment = EVCBuilder().build_view_from({'name': 'full_x_rename_half_y_half_z'}) + experiment = experiment_builder.build_view(name='full_x_rename_half_y_half_z') pairs = get_name_value_pairs(experiment.fetch_trials()) assert pairs == ((('/x', 5), ('/z', 5)), (('/x', -5), ('/z', 5))) @@ -282,7 +311,7 @@ def test_full_x_rename_half_y_half_z(init_full_x_rename_half_y_half_z, create_db def test_full_x_rename_half_y_full_z(init_full_x_rename_half_y_full_z, create_db_instance): """Test if full x half->full z is properly initialized and can fetch from its 3 parents""" - experiment = EVCBuilder().build_view_from({'name': 'full_x_rename_half_y_full_z'}) + experiment = experiment_builder.build_view(name='full_x_rename_half_y_full_z') pairs = get_name_value_pairs(experiment.fetch_trials()) assert pairs == ((('/x', 6), ('/z', 6)), (('/x', -6), ('/z', 6)), @@ -303,7 +332,7 @@ def test_full_x_rename_half_y_full_z(init_full_x_rename_half_y_full_z, create_db def test_full_x_remove_y(init_full_x_remove_y, create_db_instance): """Test if full x removed y is properly initialized and can fetch from its 2 parents""" - experiment = EVCBuilder().build_view_from({'name': 'full_x_remove_y'}) + experiment = experiment_builder.build_view(name='full_x_remove_y') pairs = get_name_value_pairs(experiment.fetch_trials()) assert pairs == ((('/x', 7), ), (('/x', -7), )) @@ -313,7 +342,7 @@ def test_full_x_remove_y(init_full_x_remove_y, create_db_instance): def test_full_x_remove_z(init_full_x_remove_z, create_db_instance): """Test if full x removed z is properly initialized and can fetch from 2 of its 3 parents""" - experiment = EVCBuilder().build_view_from({'name': 'full_x_remove_z'}) + experiment = experiment_builder.build_view(name='full_x_remove_z') pairs = get_name_value_pairs(experiment.fetch_trials()) assert pairs == ((('/x', 8), ), (('/x', -8), )) @@ -326,7 +355,7 @@ def test_full_x_remove_z_default_4(init_full_x_remove_z_default_4, create_db_ins """Test if full x removed z (default 4) is properly initialized and can fetch from 1 of its 3 parents """ - experiment = EVCBuilder().build_view_from({'name': 'full_x_remove_z_default_4'}) + experiment = experiment_builder.build_view(name='full_x_remove_z_default_4') pairs = get_name_value_pairs(experiment.fetch_trials()) assert pairs == ((('/x', 9), ), (('/x', -9), )) @@ -337,7 +366,7 @@ def test_full_x_remove_z_default_4(init_full_x_remove_z_default_4, create_db_ins def test_entire_full_x_full_y(init_entire, create_db_instance): """Test if full x full y can fetch from its parent and all children""" - experiment = EVCBuilder().build_view_from({'name': 'full_x_full_y'}) + experiment = experiment_builder.build_view(name='full_x_full_y') pairs = get_name_value_pairs(experiment.fetch_trials()) assert pairs == ((('/x', 1), ('/y', 1)), (('/x', -1), ('/y', 1)), @@ -381,7 +410,7 @@ def test_entire_full_x_full_y(init_entire, create_db_instance): def test_run_entire_full_x_full_y(init_entire, create_db_instance): """Test if branched experiment can be executed without triggering a branching event again""" - experiment = EVCBuilder().build_view_from({'name': 'full_x_full_y'}) + experiment = experiment_builder.build_view(name='full_x_full_y') assert len(experiment.fetch_trials(with_evc_tree=True)) == 23 assert len(experiment.fetch_trials()) == 4 @@ -396,7 +425,7 @@ def test_run_entire_full_x_full_y(init_entire, create_db_instance): def test_run_entire_full_x_full_y_no_args(init_entire, create_db_instance): """Test if branched experiment can be executed without script arguments""" - experiment = EVCBuilder().build_view_from({'name': 'full_x_full_y'}) + experiment = experiment_builder.build_view(name='full_x_full_y') assert len(experiment.fetch_trials(with_evc_tree=True)) == 23 assert len(experiment.fetch_trials()) == 4 @@ -408,7 +437,7 @@ def test_run_entire_full_x_full_y_no_args(init_entire, create_db_instance): def test_new_algo(init_full_x_new_algo): """Test that new algo conflict is automatically resolved""" - experiment = EVCBuilder().build_view_from({'name': 'full_x_new_algo'}) + experiment = experiment_builder.build_view(name='full_x_new_algo') assert len(experiment.fetch_trials(with_evc_tree=True)) == 3 assert len(experiment.fetch_trials()) == 2 @@ -418,21 +447,60 @@ def test_new_algo(init_full_x_new_algo): assert len(experiment.fetch_trials()) == 20 -def test_new_algo_not_resolved(init_full_x): +def test_new_algo_not_resolved(init_full_x, capsys): """Test that new algo conflict is not automatically resolved""" name = "full_x" branch = "full_x_new_algo" - with pytest.raises(ValueError) as exc: - orion.core.cli.main( - ("init_only -n {name} --branch {branch} --config new_algo_config.yaml " - "--manual-resolution ./black_box.py -x~uniform(-10,10)") - .format(name=name, branch=branch).split(" ")) - assert "Configuration is different and generates a branching event" in str(exc.value) + error_code = orion.core.cli.main( + ("init_only -n {branch} --branch-from {name} --config new_algo_config.yaml " + "--manual-resolution ./black_box.py -x~uniform(-10,10)") + .format(name=name, branch=branch).split(" ")) + assert error_code == 1 + + captured = capsys.readouterr() + assert captured.out == '' + assert "Configuration is different and generates a branching event" in captured.err + assert "gradient_descent" in captured.err + + +def test_ignore_cli(init_full_x_ignore_cli): + """Test that a non-monitored parameter conflict is not generating a child""" + name = "full_x" + orion.core.cli.main( + ("init_only -n {name} --non-monitored-arguments a-new " + "--manual-resolution ./black_box.py -x~uniform(-10,10)") + .format(name=name).split(" ")) + + +@pytest.mark.usefixtures('init_full_x', 'mock_infer_versioning_metadata') +def test_new_code_triggers_code_conflict(capsys): + """Test that a different git hash is generating a child""" + name = "full_x" + error_code = orion.core.cli.main( + ("init_only -n {name} " + "--manual-resolution ./black_box.py -x~uniform(-10,10)") + .format(name=name).split(" ")) + assert error_code == 1 + + captured = capsys.readouterr() + assert captured.out == '' + assert "Configuration is different and generates a branching event" in captured.err + assert "--code-change-type" in captured.err + + +@pytest.mark.usefixtures('init_full_x', 'mock_infer_versioning_metadata') +def test_new_code_ignores_code_conflict(): + """Test that a different git hash is *not* generating a child if --ignore-code-changes""" + name = "full_x" + orion.core.cli.main( + ("init_only -n {name} --ignore-code-changes " + "--manual-resolution ./black_box.py -x~uniform(-10,10)") + .format(name=name).split(" ")) def test_new_cli(init_full_x_new_cli): """Test that new cli conflict is automatically resolved""" - experiment = EVCBuilder().build_view_from({'name': 'full_x_new_cli'}) + experiment = experiment_builder.build_view(name='full_x_new_cli') assert len(experiment.fetch_trials(with_evc_tree=True)) == 3 assert len(experiment.fetch_trials()) == 2 @@ -452,7 +520,7 @@ def test_auto_resolution_does_resolve(init_full_x_full_y, monkeypatch): # If autoresolution was not succesfull, this to fail with a sys.exit without registering the # experiment orion.core.cli.main( - ("init_only -n {name} --branch {branch} ./black_box_with_y.py " + ("init_only -n {branch} --branch-from {name} ./black_box_with_y.py " "-x~uniform(0,10) " "-w~choices(['a','b'])").format(name=name, branch=branch).split(" ")) @@ -467,73 +535,73 @@ def test_auto_resolution_with_fidelity(init_full_x_full_y, monkeypatch): # If autoresolution was not succesfull, this to fail with a sys.exit without registering the # experiment orion.core.cli.main( - ("init_only -n {name} --branch {branch} ./black_box_with_y.py " - "-x~uniform(0,10) " + ("init_only -n {branch} --branch-from {name} ./black_box_with_y.py " + "-x~uniform(0,10, precision=None) " "-w~fidelity(1,10)").format(name=name, branch=branch).split(" ")) -def test_init_w_version_from_parent_w_children(clean_db, monkeypatch): +def test_init_w_version_from_parent_w_children(clean_db, monkeypatch, capsys): """Test that init of experiment from version with children fails.""" monkeypatch.chdir(os.path.dirname(os.path.abspath(__file__))) - orion.core.cli.main("init_only -n experiment ./black_box.py -x~normal(0,1)".split(" ")) - orion.core.cli.main("init_only -n experiment ./black_box.py -x~normal(0,1) " - "-y~+normal(0,1)".split(" ")) + execute("init_only -n experiment --config orion_config.yaml ./black_box.py -x~normal(0,1)") + execute("init_only -n experiment ./black_box.py -x~normal(0,1) -y~+normal(0,1)") - with pytest.raises(ValueError) as exc: - orion.core.cli.main("init_only -n experiment -v 1 ./black_box.py " - "-x~normal(0,1) -y~+normal(0,1) -z~normal(0,1)".split(" ")) + execute( + "init_only -n experiment -v 1 " + "./black_box.py -x~normal(0,1) -y~+normal(0,1) -z~normal(0,1)", + assert_code=1) - assert "Experiment name" in str(exc.value) + captured = capsys.readouterr() + assert captured.out == '' + assert "Configuration is different and generates a branching event" in captured.err + assert "Experiment name" in captured.err -def test_init_w_version_from_exp_wout_child(clean_db, monkeypatch, database): +def test_init_w_version_from_exp_wout_child(clean_db, monkeypatch): """Test that init of experiment from version without child works.""" monkeypatch.chdir(os.path.dirname(os.path.abspath(__file__))) - orion.core.cli.main("init_only -n experiment ./black_box.py -x~normal(0,1)".split(" ")) - orion.core.cli.main("init_only -n experiment ./black_box.py -x~normal(0,1) " - "-y~+normal(0,1)".split(" ")) - orion.core.cli.main("init_only -n experiment -v 2 ./black_box.py " - "-x~normal(0,1) -y~+normal(0,1) -z~+normal(0,1)".split(" ")) + execute("init_only -n experiment --config orion_config.yaml ./black_box.py -x~normal(0,1)") + execute("init_only -n experiment ./black_box.py -x~normal(0,1) -y~+normal(0,1)") + execute("init_only -n experiment -v 2 ./black_box.py " + "-x~normal(0,1) -y~+normal(0,1) -z~+normal(0,1)") - exp = database.experiments.find({'name': 'experiment', 'version': 3}) + exp = get_storage().fetch_experiments({'name': 'experiment', 'version': 3}) assert len(list(exp)) -def test_init_w_version_gt_max(clean_db, monkeypatch, database): +def test_init_w_version_gt_max(clean_db, monkeypatch): """Test that init of experiment from version higher than max works.""" monkeypatch.chdir(os.path.dirname(os.path.abspath(__file__))) - orion.core.cli.main("init_only -n experiment ./black_box.py -x~normal(0,1)".split(" ")) - orion.core.cli.main("init_only -n experiment ./black_box.py -x~normal(0,1) " - "-y~+normal(0,1)".split(" ")) - orion.core.cli.main("init_only -n experiment -v 2000 ./black_box.py " - "-x~normal(0,1) -y~+normal(0,1) -z~+normal(0,1)".split(" ")) + execute("init_only -n experiment --config orion_config.yaml ./black_box.py -x~normal(0,1)") + execute("init_only -n experiment ./black_box.py -x~normal(0,1) -y~+normal(0,1)") + execute("init_only -n experiment -v 2000 ./black_box.py " + "-x~normal(0,1) -y~+normal(0,1) -z~+normal(0,1)") - exp = database.experiments.find({'name': 'experiment', 'version': 3}) + exp = get_storage().fetch_experiments({'name': 'experiment', 'version': 3}) assert len(list(exp)) -def test_init_check_increment_w_children(clean_db, monkeypatch, database): +def test_init_check_increment_w_children(clean_db, monkeypatch): """Test that incrementing version works with not same-named children.""" monkeypatch.chdir(os.path.dirname(os.path.abspath(__file__))) - orion.core.cli.main("init_only -n experiment ./black_box.py -x~normal(0,1)".split(" ")) - orion.core.cli.main("init_only -n experiment --branch experiment_2 ./black_box.py " - "-x~normal(0,1) -y~+normal(0,1)".split(" ")) - orion.core.cli.main("init_only -n experiment ./black_box.py " - "-x~normal(0,1) -z~+normal(0,1)".split(" ")) + execute("init_only -n experiment --config orion_config.yaml ./black_box.py -x~normal(0,1)") + execute("init_only -n experiment --branch-to experiment_2 ./black_box.py " + "-x~normal(0,1) -y~+normal(0,1)") + execute("init_only -n experiment ./black_box.py -x~normal(0,1) -z~+normal(0,1)") - exp = database.experiments.find({'name': 'experiment', 'version': 2}) + exp = get_storage().fetch_experiments({'name': 'experiment', 'version': 2}) assert len(list(exp)) -def test_branch_from_selected_version(clean_db, monkeypatch, database): +def test_branch_from_selected_version(clean_db, monkeypatch): """Test that branching from a version passed with `--version` works.""" monkeypatch.chdir(os.path.dirname(os.path.abspath(__file__))) - orion.core.cli.main("init_only -n experiment ./black_box.py -x~normal(0,1)".split(" ")) - orion.core.cli.main("init_only -n experiment ./black_box.py -x~normal(0,1) -y~+normal(0,1)" - .split(" ")) - orion.core.cli.main("init_only -n experiment --version 1 -b experiment_2 ./black_box.py " - "-x~normal(0,1) -z~+normal(0,1)".split(" ")) - - parent = database.experiments.find({'name': 'experiment', 'version': 1})[0] - exp = database.experiments.find({'name': 'experiment_2'})[0] + execute("init_only -n experiment --config orion_config.yaml ./black_box.py -x~normal(0,1)") + execute("init_only -n experiment ./black_box.py -x~normal(0,1) -y~+normal(0,1)") + execute("init_only -n experiment --version 1 -b experiment_2 ./black_box.py " + "-x~normal(0,1) -z~+normal(0,1)") + + storage = get_storage() + parent = storage.fetch_experiments({'name': 'experiment', 'version': 1})[0] + exp = storage.fetch_experiments({'name': 'experiment_2'})[0] assert exp['refers']['parent_id'] == parent['_id'] diff --git a/tests/functional/client/black_box.py b/tests/functional/client/black_box.py new file mode 100644 index 000000000..4de16107a --- /dev/null +++ b/tests/functional/client/black_box.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""Script that will always interrupt trials.""" +import argparse + +from orion.client import interrupt_trial, report_bad_trial, report_objective # noqa: F401 + + +def no_report(): + """Do not report any result""" + return + + +def execute(): + """Execute a simple pipeline as an example.""" + parser = argparse.ArgumentParser() + parser.add_argument('fct', type=str) + parser.add_argument('--name', type=str) + parser.add_argument('--objective', type=str) + parser.add_argument('--data', type=str) + parser.add_argument('-x', type=float) + + inputs = parser.parse_args() + + kwargs = {} + + # Maybe its a float, maybe user made a mistake and report objective='name' + try: + inputs.objective = float(inputs.objective) + except (ValueError, TypeError): + pass + + for key, value in vars(inputs).items(): + if value is not None: + kwargs[key] = value + + kwargs.pop('fct') + kwargs.pop('x') + + if 'data' in kwargs: + kwargs['data'] = [dict(name=kwargs['data'], type='constraint', value=1.0)] + + globals()[inputs.fct](**kwargs) + + +if __name__ == "__main__": + execute() diff --git a/tests/functional/client/orion_config.yaml b/tests/functional/client/orion_config.yaml new file mode 100644 index 000000000..c5c730617 --- /dev/null +++ b/tests/functional/client/orion_config.yaml @@ -0,0 +1,14 @@ +name: voila_voici + +pool_size: 1 +max_trials: 100 + +algorithms: random + +producer: + strategy: NoParallelStrategy + +database: + type: 'mongodb' + name: 'orion_test' + host: 'mongodb://user:pass@localhost' diff --git a/tests/functional/client/test_cli_client.py b/tests/functional/client/test_cli_client.py new file mode 100644 index 000000000..750c0fa51 --- /dev/null +++ b/tests/functional/client/test_cli_client.py @@ -0,0 +1,235 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""Perform a functional test for client helper functions.""" +import os + +import pytest + +import orion.core.cli +from orion.core.worker.consumer import Consumer + + +@pytest.mark.usefixtures("clean_db") +@pytest.mark.usefixtures("null_db_instances") +def test_interrupt(database, monkeypatch, capsys): + """Test interruption from within user script.""" + monkeypatch.chdir(os.path.dirname(os.path.abspath(__file__))) + + user_args = [ + "-x~uniform(-50, 50, precision=5)"] + + error_code = orion.core.cli.main([ + "hunt", "--config", "./orion_config.yaml", "--worker-trials", "2", + "python", "black_box.py", "interrupt_trial"] + user_args) + + assert error_code == 130 + + captured = capsys.readouterr() + assert captured.out == 'Orion is interrupted.\n' + assert captured.err == '' + + exp = list(database.experiments.find({'name': 'voila_voici'})) + exp = exp[0] + exp_id = exp['_id'] + trials = list(database.trials.find({'experiment': exp_id})) + assert len(trials) == 1 + assert trials[0]['status'] == 'interrupted' + + +@pytest.mark.usefixtures("clean_db") +@pytest.mark.usefixtures("null_db_instances") +def test_interrupt_diff_code(database, monkeypatch, capsys): + """Test interruption from within user script with custom int code""" + monkeypatch.chdir(os.path.dirname(os.path.abspath(__file__))) + + user_args = [ + "-x~uniform(-50, 50, precision=5)"] + + # Set local to 200 + orion.core.config.worker.interrupt_signal_code = 200 + + # But child won't be passed ORION_INTERRUPT_CODE and therefore will send default code 130 + def empty_env(self, trial, results_file=None): + return os.environ + + with monkeypatch.context() as m: + m.setattr(Consumer, 'get_execution_environment', empty_env) + + # Interrupt won't be interpreted properly and trials will be marked as broken + error_code = orion.core.cli.main([ + "hunt", "--config", "./orion_config.yaml", "--worker-trials", "2", + "python", "black_box.py", "interrupt_trial"] + user_args) + + assert error_code == 0 + + exp = list(database.experiments.find({'name': 'voila_voici'})) + exp = exp[0] + exp_id = exp['_id'] + trials = list(database.trials.find({'experiment': exp_id})) + assert len(trials) == 2 + assert trials[0]['status'] == 'broken' + + # This time we use true `get_execution_environment which pass properly int code to child. + error_code = orion.core.cli.main([ + "hunt", "--config", "./orion_config.yaml", "--worker-trials", "2", + "python", "black_box.py", "interrupt_trial"] + user_args) + + assert error_code == 130 + + captured = capsys.readouterr() + assert 'Orion is interrupted.\n' in captured.out + assert captured.err == '' + + exp = list(database.experiments.find({'name': 'voila_voici'})) + exp = exp[0] + exp_id = exp['_id'] + trials = list(database.trials.find({'experiment': exp_id})) + assert len(trials) == 3 + assert trials[-1]['status'] == 'interrupted' + + +# TODO: + +# test no call to any report + +# Add all this in DOC. + + +@pytest.mark.parametrize('fct', ['report_bad_trial', 'report_objective']) +@pytest.mark.usefixtures("clean_db") +@pytest.mark.usefixtures("null_db_instances") +def test_report_no_name(database, monkeypatch, fct): + """Test report helper functions with default names""" + monkeypatch.chdir(os.path.dirname(os.path.abspath(__file__))) + + user_args = [ + "-x~uniform(-50, 50, precision=5)"] + + orion.core.cli.main([ + "hunt", "--config", "./orion_config.yaml", "--worker-trials", "2", + "python", "black_box.py", fct, "--objective", "1.0"] + user_args) + + exp = list(database.experiments.find({'name': 'voila_voici'})) + exp = exp[0] + exp_id = exp['_id'] + trials = list(database.trials.find({'experiment': exp_id})) + assert len(trials) == 2 + assert trials[0]['status'] == 'completed' + assert trials[0]['results'][0]['name'] == 'objective' + assert trials[0]['results'][0]['type'] == 'objective' + assert trials[0]['results'][0]['value'] == 1.0 + + +@pytest.mark.parametrize('fct', ['report_bad_trial', 'report_objective']) +@pytest.mark.usefixtures("clean_db") +@pytest.mark.usefixtures("null_db_instances") +def test_report_with_name(database, monkeypatch, fct): + """Test report helper functions with custom names""" + monkeypatch.chdir(os.path.dirname(os.path.abspath(__file__))) + + user_args = [ + "-x~uniform(-50, 50, precision=5)"] + + orion.core.cli.main([ + "hunt", "--config", "./orion_config.yaml", "--worker-trials", "2", + "python", "black_box.py", fct, "--objective", "1.0", "--name", "metric"] + user_args) + + exp = list(database.experiments.find({'name': 'voila_voici'})) + exp = exp[0] + exp_id = exp['_id'] + trials = list(database.trials.find({'experiment': exp_id})) + assert len(trials) == 2 + assert trials[0]['status'] == 'completed' + assert trials[0]['results'][0]['name'] == 'metric' + assert trials[0]['results'][0]['type'] == 'objective' + assert trials[0]['results'][0]['value'] == 1.0 + + +@pytest.mark.parametrize('fct', ['report_bad_trial', 'report_objective']) +@pytest.mark.usefixtures("clean_db") +@pytest.mark.usefixtures("null_db_instances") +def test_report_with_bad_objective(database, monkeypatch, fct): + """Test report helper functions with bad objective types""" + monkeypatch.chdir(os.path.dirname(os.path.abspath(__file__))) + + user_args = [ + "-x~uniform(-50, 50, precision=5)"] + + with pytest.raises(ValueError) as exc: + orion.core.cli.main([ + "hunt", "--config", "./orion_config.yaml", "--worker-trials", "2", + "python", "black_box.py", fct, "--objective", "oh oh"] + user_args) + + assert 'must contain a type `objective` with type float/int' in str(exc.value) + + +@pytest.mark.usefixtures("clean_db") +@pytest.mark.usefixtures("null_db_instances") +def test_report_with_bad_trial_no_objective(database, monkeypatch): + """Test bad trial report helper function with default objective.""" + monkeypatch.chdir(os.path.dirname(os.path.abspath(__file__))) + + user_args = [ + "-x~uniform(-50, 50, precision=5)"] + + orion.core.cli.main([ + "hunt", "--config", "./orion_config.yaml", "--worker-trials", "2", + "python", "black_box.py", "report_bad_trial"] + user_args) + + exp = list(database.experiments.find({'name': 'voila_voici'})) + exp = exp[0] + exp_id = exp['_id'] + trials = list(database.trials.find({'experiment': exp_id})) + assert len(trials) == 2 + assert trials[0]['status'] == 'completed' + assert trials[0]['results'][0]['name'] == 'objective' + assert trials[0]['results'][0]['type'] == 'objective' + assert trials[0]['results'][0]['value'] == 1e10 + + +@pytest.mark.usefixtures("clean_db") +@pytest.mark.usefixtures("null_db_instances") +def test_report_with_bad_trial_with_data(database, monkeypatch): + """Test bad trial report helper function with additional data.""" + monkeypatch.chdir(os.path.dirname(os.path.abspath(__file__))) + + user_args = [ + "-x~uniform(-50, 50, precision=5)"] + + orion.core.cli.main([ + "hunt", "--config", "./orion_config.yaml", "--worker-trials", "2", + "python", "black_box.py", "report_bad_trial", "--data", "another"] + user_args) + + exp = list(database.experiments.find({'name': 'voila_voici'})) + exp = exp[0] + exp_id = exp['_id'] + trials = list(database.trials.find({'experiment': exp_id})) + assert len(trials) == 2 + assert trials[0]['status'] == 'completed' + assert trials[0]['results'][0]['name'] == 'objective' + assert trials[0]['results'][0]['type'] == 'objective' + assert trials[0]['results'][0]['value'] == 1e10 + + assert trials[0]['results'][1]['name'] == 'another' + assert trials[0]['results'][1]['type'] == 'constraint' + assert trials[0]['results'][1]['value'] == 1.0 + + +@pytest.mark.usefixtures("clean_db") +@pytest.mark.usefixtures("null_db_instances") +def test_no_report(database, monkeypatch, capsys): + """Test script call without any results reported.""" + monkeypatch.chdir(os.path.dirname(os.path.abspath(__file__))) + + user_args = [ + "-x~uniform(-50, 50, precision=5)"] + + errorcode = orion.core.cli.main([ + "hunt", "--config", "./orion_config.yaml", "--worker-trials", "2", + "python", "black_box.py", 'no_report'] + user_args) + + assert errorcode == 1 + + captured = capsys.readouterr() + assert captured.out == "" + assert 'Cannot parse result file' in captured.err diff --git a/tests/functional/commands/conftest.py b/tests/functional/commands/conftest.py index e2c205f7b..49c53dab6 100644 --- a/tests/functional/commands/conftest.py +++ b/tests/functional/commands/conftest.py @@ -1,6 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- """Common fixtures and utils for unittests and functional tests.""" +import copy import os from pymongo import MongoClient @@ -10,7 +11,7 @@ from orion.algo.base import (BaseAlgorithm, OptimizationAlgorithm) import orion.core.cli from orion.core.io.database import Database -from orion.core.io.experiment_builder import ExperimentBuilder +import orion.core.io.experiment_builder as experiment_builder import orion.core.utils.backward as backward from orion.core.worker.trial import Trial from orion.storage.base import get_storage @@ -90,7 +91,7 @@ def exp_config(): exp_config = list(yaml.safe_load_all(f)) for config in exp_config[0]: - backward.populate_priors(config['metadata']) + backward.populate_space(config) return exp_config @@ -163,7 +164,7 @@ def one_experiment(monkeypatch, db_instance): @pytest.fixture def one_experiment_changed_vcs(one_experiment): """Create an experiment without trials.""" - experiment = ExperimentBuilder().build_from({'name': one_experiment['name']}) + experiment = experiment_builder.build(name=one_experiment['name']) experiment.metadata['VCS'] = { 'type': 'git', 'is_dirty': False, 'HEAD_sha': 'new', 'active_branch': 'master', @@ -180,7 +181,7 @@ def one_experiment_no_version(monkeypatch, one_experiment): def fetch_without_version(query, selection=None): if query.get('name') == one_experiment['name'] or query == {}: - return [one_experiment] + return [copy.deepcopy(one_experiment)] return [] @@ -189,6 +190,14 @@ def fetch_without_version(query, selection=None): return one_experiment +@pytest.fixture +def with_experiment_using_python_api(monkeypatch, one_experiment): + """Create an experiment without trials.""" + experiment = experiment_builder.build(name='from-python-api', space={'x': 'uniform(0, 10)'}) + + return experiment + + @pytest.fixture def broken_refers(one_experiment, db_instance): """Create an experiment with broken refers.""" @@ -201,7 +210,7 @@ def single_without_success(one_experiment): statuses = list(Trial.allowed_stati) statuses.remove('completed') - exp = ExperimentBuilder().build_from({'name': 'test_single_exp'}) + exp = experiment_builder.build(name='test_single_exp') x = {'name': '/x', 'type': 'real'} x_value = 0 @@ -215,7 +224,7 @@ def single_without_success(one_experiment): @pytest.fixture def single_with_trials(single_without_success): """Create an experiment with all types of trials.""" - exp = ExperimentBuilder().build_from({'name': 'test_single_exp'}) + exp = experiment_builder.build(name='test_single_exp') x = {'name': '/x', 'type': 'real', 'value': 100} results = {"name": "obj", "type": "objective", "value": 0} @@ -232,7 +241,7 @@ def two_experiments(monkeypatch, db_instance): ensure_deterministic_id('test_double_exp', db_instance) orion.core.cli.main(['init_only', '-n', 'test_double_exp', - '--branch', 'test_double_exp_child', './black_box.py', + '--branch-to', 'test_double_exp_child', './black_box.py', '--x~+uniform(0,1,default_value=0)', '--y~+uniform(0,1,default_value=0)']) ensure_deterministic_id('test_double_exp_child', db_instance) @@ -240,8 +249,8 @@ def two_experiments(monkeypatch, db_instance): @pytest.fixture def family_with_trials(two_experiments): """Create two related experiments with all types of trials.""" - exp = ExperimentBuilder().build_from({'name': 'test_double_exp'}) - exp2 = ExperimentBuilder().build_from({'name': 'test_double_exp_child'}) + exp = experiment_builder.build(name='test_double_exp') + exp2 = experiment_builder.build(name='test_double_exp_child') x = {'name': '/x', 'type': 'real'} y = {'name': '/y', 'type': 'real'} @@ -260,7 +269,7 @@ def family_with_trials(two_experiments): @pytest.fixture def unrelated_with_trials(family_with_trials, single_with_trials): """Create two unrelated experiments with all types of trials.""" - exp = ExperimentBuilder().build_from({'name': 'test_double_exp_child'}) + exp = experiment_builder.build(name='test_double_exp_child') Database().remove('trials', {'experiment': exp.id}) Database().remove('experiments', {'_id': exp.id}) @@ -282,7 +291,7 @@ def three_experiments_with_trials(family_with_trials, single_with_trials): def three_experiments_family(two_experiments, db_instance): """Create three experiments, one of which is the parent of the other two.""" orion.core.cli.main(['init_only', '-n', 'test_double_exp', - '--branch', 'test_double_exp_child2', './black_box.py', + '--branch-to', 'test_double_exp_child2', './black_box.py', '--x~+uniform(0,1,default_value=0)', '--z~+uniform(0,1,default_value=0)']) ensure_deterministic_id('test_double_exp_child2', db_instance) @@ -290,7 +299,7 @@ def three_experiments_family(two_experiments, db_instance): @pytest.fixture def three_family_with_trials(three_experiments_family, family_with_trials): """Create three experiments, all related, two direct children, with all types of trials.""" - exp = ExperimentBuilder().build_from({'name': 'test_double_exp_child2'}) + exp = experiment_builder.build(name='test_double_exp_child2') x = {'name': '/x', 'type': 'real'} z = {'name': '/z', 'type': 'real'} @@ -307,7 +316,7 @@ def three_family_with_trials(three_experiments_family, family_with_trials): def three_experiments_family_branch(two_experiments, db_instance): """Create three experiments, each parent of the following one.""" orion.core.cli.main(['init_only', '-n', 'test_double_exp_child', - '--branch', 'test_double_exp_grand_child', './black_box.py', + '--branch-to', 'test_double_exp_grand_child', './black_box.py', '--x~+uniform(0,1,default_value=0)', '--y~uniform(0,1,default_value=0)', '--z~+uniform(0,1,default_value=0)']) ensure_deterministic_id('test_double_exp_grand_child', db_instance) @@ -319,7 +328,7 @@ def three_family_branch_with_trials(three_experiments_family_branch, family_with with all types of trials. """ - exp = ExperimentBuilder().build_from({'name': 'test_double_exp_grand_child'}) + exp = experiment_builder.build(name='test_double_exp_grand_child') x = {'name': '/x', 'type': 'real'} y = {'name': '/y', 'type': 'real'} z = {'name': '/z', 'type': 'real'} diff --git a/tests/functional/commands/test_db_commands.py b/tests/functional/commands/test_db_commands.py new file mode 100644 index 000000000..e22d0869e --- /dev/null +++ b/tests/functional/commands/test_db_commands.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""Perform a functional test of the db commands.""" +import pytest + +import orion.core.cli + + +def test_no_args(capsys): + """Test that help is printed when no args are given.""" + with pytest.raises(SystemExit): + orion.core.cli.main(['db']) + + captured = capsys.readouterr().out + + assert 'usage:' in captured + assert 'Traceback' not in captured diff --git a/tests/functional/commands/test_hunt_command.py b/tests/functional/commands/test_hunt_command.py new file mode 100644 index 000000000..cbbe1c8ae --- /dev/null +++ b/tests/functional/commands/test_hunt_command.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""Perform a functional test of the hunt command.""" +import pytest + +import orion.core.cli + + +def test_hunt_no_prior(clean_db, one_experiment, capsys): + """Test at least one prior is specified""" + orion.core.cli.main(["hunt", "-n", "test", "./black_box.py"]) + + captured = capsys.readouterr().err + + assert "No prior found" in captured + assert 'Traceback' not in captured + + +def test_no_args(capsys): + """Test that help is printed when no args are given.""" + with pytest.raises(SystemExit): + orion.core.cli.main(['hunt']) + + captured = capsys.readouterr().out + + assert 'usage:' in captured + assert 'Traceback' not in captured + + +def test_no_name(capsys): + """Try to run the command without providing an experiment name""" + returncode = orion.core.cli.main(["hunt", "--exp-max-trials", "10"]) + assert returncode == 1 + + captured = capsys.readouterr().err + + assert captured == 'Error: No name provided for the experiment.\n' diff --git a/tests/functional/commands/test_info_command.py b/tests/functional/commands/test_info_command.py index 4cf30a4bb..0b7615a18 100644 --- a/tests/functional/commands/test_info_command.py +++ b/tests/functional/commands/test_info_command.py @@ -47,3 +47,33 @@ def test_info_no_branching(clean_db, one_experiment_changed_vcs, capsys): captured = capsys.readouterr().out assert '\nversion: 1\n' in captured + + +def test_info_python_api(clean_db, with_experiment_using_python_api, capsys): + """Test info if config built using python api""" + orion.core.cli.main(['info', '--name', 'from-python-api']) + + captured = capsys.readouterr().out + + assert 'from-python-api' in captured + assert 'Commandline' not in captured + + +def test_info_cmdline_api(clean_db, with_experiment_using_python_api, capsys): + """Test info if config built using cmdline api""" + orion.core.cli.main(['info', '--name', 'test_single_exp']) + + captured = capsys.readouterr().out + + assert 'test_single_exp' in captured + assert 'Commandline' in captured + + +def test_no_args(capsys): + """Try to run the command without any arguments""" + returncode = orion.core.cli.main(["info"]) + assert returncode == 1 + + captured = capsys.readouterr().err + + assert captured == 'Error: No name provided for the experiment.\n' diff --git a/tests/functional/commands/test_init_only_command.py b/tests/functional/commands/test_init_only_command.py new file mode 100644 index 000000000..71c8a51ac --- /dev/null +++ b/tests/functional/commands/test_init_only_command.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""Perform a functional test of the init_only command.""" +import pytest + +import orion.core.cli + + +def test_no_args(capsys): + """Test that help is printed when no args are given.""" + with pytest.raises(SystemExit): + orion.core.cli.main(['init_only']) + + captured = capsys.readouterr().out + + assert 'usage:' in captured + assert 'Traceback' not in captured + + +def test_no_name(capsys): + """Try to run the command without providing an experiment name""" + returncode = orion.core.cli.main(["init_only", "--exp-max-trials", "10"]) + assert returncode == 1 + + captured = capsys.readouterr().err + + assert captured == 'Error: No name provided for the experiment.\n' diff --git a/tests/functional/commands/test_insert_command.py b/tests/functional/commands/test_insert_command.py index 28b6c104f..502aff42d 100644 --- a/tests/functional/commands/test_insert_command.py +++ b/tests/functional/commands/test_insert_command.py @@ -25,7 +25,7 @@ def test_insert_invalid_experiment(database, monkeypatch): orion.core.cli.main(["insert", "-n", "dumb_experiment", "-c", "./orion_config_random.yaml", "./black_box.py", "-x=1"]) - assert ("No experiment with given name 'dumb_experiment' for user 'corneau'" + assert ("No experiment with given name 'dumb_experiment' and version '*'" in str(exc_info.value)) @@ -171,3 +171,24 @@ def test_insert_with_version(create_db_instance, monkeypatch, script_path): trials = list(get_storage().fetch_trials(uid=exp['_id'])) assert len(trials) == 1 + + +def test_no_args(capsys): + """Test that help is printed when no args are given.""" + with pytest.raises(SystemExit): + orion.core.cli.main(['insert']) + + captured = capsys.readouterr().out + + assert 'usage:' in captured + assert 'Traceback' not in captured + + +def test_no_name(capsys): + """Try to run the command without providing an experiment name""" + returncode = orion.core.cli.main(["insert", "--version", "1"]) + assert returncode == 1 + + captured = capsys.readouterr().err + + assert captured == 'Error: No name provided for the experiment.\n' diff --git a/tests/functional/commands/test_list_command.py b/tests/functional/commands/test_list_command.py index 1f913af63..ed4bff4cc 100644 --- a/tests/functional/commands/test_list_command.py +++ b/tests/functional/commands/test_list_command.py @@ -13,7 +13,7 @@ def test_no_exp(monkeypatch, clean_db, capsys): captured = capsys.readouterr().out - assert captured == "" + assert captured == "No experiment found\n" def test_single_exp(clean_db, one_experiment, capsys): @@ -43,6 +43,15 @@ def test_broken_refers(clean_db, broken_refers, capsys): assert captured == " test_single_exp-v1\n" +def test_python_api(clean_db, with_experiment_using_python_api, capsys): + """Test list if containing exps from cmdline api and python api""" + orion.core.cli.main(['list']) + + captured = capsys.readouterr().out + + assert captured == " test_single_exp-v1\n from-python-api-v1\n" + + def test_two_exp(capsys, clean_db, two_experiments): """Test that experiment and child are printed.""" orion.core.cli.main(['list']) @@ -75,7 +84,7 @@ def test_no_exp_name(clean_db, three_experiments, monkeypatch, capsys): captured = capsys.readouterr().out - assert captured == "" + assert captured == "No experiment found\n" def test_exp_name(clean_db, three_experiments, monkeypatch, capsys): diff --git a/tests/functional/commands/test_status_command.py b/tests/functional/commands/test_status_command.py index 7b00c3abe..10e690f1c 100644 --- a/tests/functional/commands/test_status_command.py +++ b/tests/functional/commands/test_status_command.py @@ -30,6 +30,27 @@ def test_no_version_backward_compatible(clean_db, one_experiment_no_version, cap empty +""" + assert captured == expected + + +def test_python_api(clean_db, with_experiment_using_python_api, capsys): + """Test status with experiments built using python api.""" + orion.core.cli.main(['status']) + + captured = capsys.readouterr().out + + expected = """\ +test_single_exp-v1 +================== +empty + + +from-python-api-v1 +================== +empty + + """ assert captured == expected @@ -72,6 +93,35 @@ def test_experiment_wout_success_wout_ac(clean_db, single_without_success, capsy assert captured == expected +def test_experiment_number_same_list_status(clean_db, + single_without_success, capsys): + """Test status and list command output the consistent number of experiments""" + orion.core.cli.main(['status']) + + captured = capsys.readouterr().out + + expected = """\ +test_single_exp-v1 +================== +status quantity +----------- ---------- +broken 1 +interrupted 1 +new 1 +reserved 1 +suspended 1 + + +""" + assert captured == expected + + orion.core.cli.main(['list']) + + captured = capsys.readouterr().out + + assert captured == " test_single_exp-v1\n" + + def test_experiment_w_trials_wout_ac(clean_db, single_with_trials, capsys): """Test status with only one experiment and all trials.""" orion.core.cli.main(['status']) @@ -972,7 +1022,7 @@ def test_experiment_w_parent_w_name(clean_db, three_experiments_with_trials, cap completed 1 interrupted 1 new 2 -reserved 1 +reserved 2 suspended 1 diff --git a/tests/functional/demo/script_config.yaml b/tests/functional/demo/script_config.yaml index a41ceb1e2..e1ee43a3a 100644 --- a/tests/functional/demo/script_config.yaml +++ b/tests/functional/demo/script_config.yaml @@ -1 +1 @@ -x: 'orion~uniform(-50, 50)' +x: 'orion~uniform(-50, 50, precision=None)' diff --git a/tests/functional/demo/test_demo.py b/tests/functional/demo/test_demo.py index d31eb31cd..b34aca372 100644 --- a/tests/functional/demo/test_demo.py +++ b/tests/functional/demo/test_demo.py @@ -12,10 +12,12 @@ import yaml import orion.core.cli -from orion.core.io.experiment_builder import ExperimentBuilder -import orion.core.utils.backward as backward +from orion.core.io.database.ephemeraldb import EphemeralDB +import orion.core.io.experiment_builder as experiment_builder +from orion.core.utils.tests import OrionState from orion.core.worker import workon -from orion.core.worker.experiment import Experiment +from orion.storage.base import get_storage +from orion.storage.legacy import Legacy @pytest.mark.usefixtures("clean_db") @@ -42,8 +44,7 @@ def test_demo_with_default_algo_cli_config_only(database, monkeypatch): assert 'datetime' in exp['metadata'] assert 'orion_version' in exp['metadata'] assert 'user_script' in exp['metadata'] - assert os.path.isabs(exp['metadata']['user_script']) - assert exp['metadata']['user_args'] == ['-x~uniform(-50, 50)'] + assert exp['metadata']['user_args'] == ["./black_box.py", '-x~uniform(-50, 50)'] @pytest.mark.usefixtures("clean_db") @@ -53,7 +54,8 @@ def test_demo(database, monkeypatch): monkeypatch.chdir(os.path.dirname(os.path.abspath(__file__))) user_args = [ - "-x~uniform(-50, 50)", + "./black_box.py", + "-x~uniform(-50, 50, precision=None)", "--test-env", "--experiment-id", '{exp.id}', "--experiment-name", '{exp.name}', @@ -62,7 +64,7 @@ def test_demo(database, monkeypatch): "--working-dir", '{trial.working_dir}'] orion.core.cli.main([ - "hunt", "--config", "./orion_config.yaml", "./black_box.py"] + user_args) + "hunt", "--config", "./orion_config.yaml"] + user_args) exp = list(database.experiments.find({'name': 'voila_voici'})) assert len(exp) == 1 @@ -78,7 +80,6 @@ def test_demo(database, monkeypatch): assert 'datetime' in exp['metadata'] assert 'orion_version' in exp['metadata'] assert 'user_script' in exp['metadata'] - assert os.path.isabs(exp['metadata']['user_script']) assert exp['metadata']['user_args'] == user_args trials = list(database.trials.find({'experiment': exp_id})) assert len(trials) <= 15 @@ -122,8 +123,53 @@ def test_demo_with_script_config(database, monkeypatch): assert 'datetime' in exp['metadata'] assert 'orion_version' in exp['metadata'] assert 'user_script' in exp['metadata'] - assert os.path.isabs(exp['metadata']['user_script']) - assert exp['metadata']['user_args'] == ['--config', 'script_config.yaml'] + assert exp['metadata']['user_args'] == ['./black_box_w_config.py', '--config', + 'script_config.yaml'] + + trials = list(database.trials.find({'experiment': exp_id})) + assert len(trials) <= 15 + assert trials[-1]['status'] == 'completed' + trials = list(sorted(trials, key=lambda trial: trial['submit_time'])) + for result in trials[-1]['results']: + assert result['type'] != 'constraint' + if result['type'] == 'objective': + assert abs(result['value'] - 23.4) < 1e-6 + assert result['name'] == 'example_objective' + elif result['type'] == 'gradient': + res = numpy.asarray(result['value']) + assert 0.1 * numpy.sqrt(res.dot(res)) < 1e-7 + assert result['name'] == 'example_gradient' + params = trials[-1]['params'] + assert len(params) == 1 + assert params[0]['name'] == '/x' + assert params[0]['type'] == 'real' + assert (params[0]['value'] - 34.56789) < 1e-5 + + +@pytest.mark.usefixtures("clean_db") +@pytest.mark.usefixtures("null_db_instances") +def test_demo_with_python_and_script(database, monkeypatch): + """Test a simple usage scenario.""" + monkeypatch.chdir(os.path.dirname(os.path.abspath(__file__))) + orion.core.cli.main(["hunt", "--config", "./orion_config.yaml", + "python", "black_box_w_config.py", "--config", "script_config.yaml"]) + + exp = list(database.experiments.find({'name': 'voila_voici'})) + assert len(exp) == 1 + exp = exp[0] + assert '_id' in exp + exp_id = exp['_id'] + assert exp['name'] == 'voila_voici' + assert exp['pool_size'] == 1 + assert exp['max_trials'] == 100 + assert exp['algorithms'] == {'gradient_descent': {'learning_rate': 0.1, + 'dx_tolerance': 1e-7}} + assert 'user' in exp['metadata'] + assert 'datetime' in exp['metadata'] + assert 'orion_version' in exp['metadata'] + assert 'user_script' in exp['metadata'] + assert exp['metadata']['user_args'] == ['python', 'black_box_w_config.py', + '--config', 'script_config.yaml'] trials = list(database.trials.find({'experiment': exp_id})) assert len(trials) <= 15 @@ -174,8 +220,7 @@ def test_demo_two_workers(database, monkeypatch): assert 'datetime' in exp['metadata'] assert 'orion_version' in exp['metadata'] assert 'user_script' in exp['metadata'] - assert os.path.isabs(exp['metadata']['user_script']) - assert exp['metadata']['user_args'] == ['-x~norm(34, 3)'] + assert exp['metadata']['user_args'] == ['./black_box.py', '-x~norm(34, 3)'] trials = list(database.trials.find({'experiment': exp_id})) status = defaultdict(int) @@ -189,11 +234,10 @@ def test_demo_two_workers(database, monkeypatch): assert params[0]['type'] == 'real' -@pytest.mark.usefixtures("create_db_instance") -def test_workon(database): +def test_workon(): """Test scenario having a configured experiment already setup.""" - experiment = Experiment('voila_voici') - config = experiment.configuration + name = 'voici_voila' + config = {'name': name} config['algorithms'] = { 'gradient_descent': { 'learning_rate': 0.1 @@ -201,48 +245,49 @@ def test_workon(database): } config['pool_size'] = 1 config['max_trials'] = 100 - config['metadata']['user_script'] = os.path.abspath(os.path.join( - os.path.dirname(__file__), "black_box.py")) - config['metadata']['user_args'] = ["-x~uniform(-50, 50)"] - backward.populate_priors(config['metadata']) - experiment.configure(config) - - workon(experiment) - - exp = list(database.experiments.find({'name': 'voila_voici'})) - assert len(exp) == 1 - exp = exp[0] - assert '_id' in exp - exp_id = exp['_id'] - assert exp['name'] == 'voila_voici' - assert exp['pool_size'] == 1 - assert exp['max_trials'] == 100 - assert exp['algorithms'] == {'gradient_descent': {'learning_rate': 0.1, - 'dx_tolerance': 1e-7}} - assert 'user' in exp['metadata'] - assert 'datetime' in exp['metadata'] - assert 'user_script' in exp['metadata'] - assert os.path.isabs(exp['metadata']['user_script']) - assert exp['metadata']['user_args'] == ['-x~uniform(-50, 50)'] - - trials = list(database.trials.find({'experiment': exp_id})) - assert len(trials) <= 15 - trials = list(sorted(trials, key=lambda trial: trial['submit_time'])) - assert trials[-1]['status'] == 'completed' - for result in trials[-1]['results']: - assert result['type'] != 'constraint' - if result['type'] == 'objective': - assert abs(result['value'] - 23.4) < 1e-6 - assert result['name'] == 'example_objective' - elif result['type'] == 'gradient': - res = numpy.asarray(result['value']) - assert 0.1 * numpy.sqrt(res.dot(res)) < 1e-7 - assert result['name'] == 'example_gradient' - params = trials[-1]['params'] - assert len(params) == 1 - assert params[0]['name'] == '/x' - assert params[0]['type'] == 'real' - assert (params[0]['value'] - 34.56789) < 1e-5 + config['user_args'] = [ + os.path.abspath(os.path.join(os.path.dirname(__file__), "black_box.py")), + "-x~uniform(-50, 50, precision=None)"] + + with OrionState(): + experiment = experiment_builder.build_from_args(config) + + workon(experiment, 100, 100, 100, 100, 100) + + storage = get_storage() + + exp = list(storage.fetch_experiments({'name': name})) + assert len(exp) == 1 + exp = exp[0] + assert '_id' in exp + assert exp['name'] == name + assert exp['pool_size'] == 1 + assert exp['max_trials'] == 100 + assert exp['algorithms'] == {'gradient_descent': {'learning_rate': 0.1, + 'dx_tolerance': 1e-7}} + assert 'user' in exp['metadata'] + assert 'datetime' in exp['metadata'] + assert 'user_script' in exp['metadata'] + assert exp['metadata']['user_args'] == config['user_args'] + + trials = list(storage.fetch_trials(experiment)) + assert len(trials) <= 15 + trials = list(sorted(trials, key=lambda trial: trial.submit_time)) + assert trials[-1].status == 'completed' + for result in trials[-1].results: + assert result.type != 'constraint' + if result.type == 'objective': + assert abs(result.value - 23.4) < 1e-6 + assert result.name == 'example_objective' + elif result.type == 'gradient': + res = numpy.asarray(result.value) + assert 0.1 * numpy.sqrt(res.dot(res)) < 1e-7 + assert result.name == 'example_gradient' + params = trials[-1]._params + assert len(params) == 1 + assert params[0].name == '/x' + assert params[0].type == 'real' + assert (params[0].value - 34.56789) < 1e-5 @pytest.mark.usefixtures("clean_db") @@ -253,7 +298,7 @@ def test_stress_unique_folder_creation(database, monkeypatch, tmpdir, capfd): """ # XXX: return and complete test when there is a way to control random # seed of Oríon - how_many = 50 + how_many = 2 monkeypatch.chdir(os.path.dirname(os.path.abspath(__file__))) orion.core.cli.main(["hunt", "--max-trials={}".format(how_many), "--pool-size=1", @@ -318,7 +363,7 @@ def test_working_dir_argument_cmdline(database, monkeypatch, tmp_path): @pytest.mark.usefixtures("clean_db") @pytest.mark.usefixtures("null_db_instances") def test_tmpdir_is_deleted(database, monkeypatch, tmp_path): - """Check that a permanent directory is used instead of tmpdir""" + """Check that temporary directory is deletid tmpdir""" tmp_path = os.path.join(tempfile.gettempdir(), 'orion') if os.path.exists(tmp_path): shutil.rmtree(tmp_path) @@ -364,7 +409,6 @@ def test_run_with_name_only(database, monkeypatch): exp = list(database.experiments.find({'name': 'demo_random_search'})) assert len(exp) == 1 exp = exp[0] - print(exp['max_trials']) assert '_id' in exp exp_id = exp['_id'] trials = list(database.trials.find({'experiment': exp_id})) @@ -385,7 +429,6 @@ def test_run_with_name_only_with_trailing_whitespace(database, monkeypatch): exp = list(database.experiments.find({'name': 'demo_random_search'})) assert len(exp) == 1 exp = exp[0] - print(exp['max_trials']) assert '_id' in exp exp_id = exp['_id'] trials = list(database.trials.find({'experiment': exp_id})) @@ -474,7 +517,7 @@ def test_resilience(monkeypatch): orion.core.cli.main(["hunt", "--config", "./orion_config_random.yaml", "./broken_box.py", "-x~uniform(-50, 50)"]) - exp = ExperimentBuilder().build_from({'name': 'demo_random_search'}) + exp = experiment_builder.build(name='demo_random_search') assert len(exp.fetch_trials_by_status('broken')) == MAX_BROKEN @@ -484,11 +527,13 @@ def test_demo_with_shutdown_quickly(monkeypatch): """Check simple pipeline with random search is reasonably fast.""" monkeypatch.chdir(os.path.dirname(os.path.abspath(__file__))) + monkeypatch.setattr(orion.core.config.worker, 'heartbeat', 120) + process = subprocess.Popen( - ["orion", "hunt", "--config", "./orion_config_random.yaml", "--max-trials", "30", + ["orion", "hunt", "--config", "./orion_config_random.yaml", "--max-trials", "10", "./black_box.py", "-x~uniform(-50, 50)"]) - assert process.wait(timeout=10) == 0 + assert process.wait(timeout=40) == 0 @pytest.mark.usefixtures("clean_db") @@ -496,7 +541,7 @@ def test_demo_with_shutdown_quickly(monkeypatch): def test_demo_with_nondefault_config_keyword(database, monkeypatch): """Check that the user script configuration file is correctly used with a new keyword.""" monkeypatch.chdir(os.path.dirname(os.path.abspath(__file__))) - orion.core.config.user_script_config = 'configuration' + orion.core.config.worker.user_script_config = 'configuration' orion.core.cli.main(["hunt", "--config", "./orion_config_other.yaml", "./black_box_w_config_other.py", "--configuration", "script_config.yaml"]) @@ -514,8 +559,8 @@ def test_demo_with_nondefault_config_keyword(database, monkeypatch): assert 'datetime' in exp['metadata'] assert 'orion_version' in exp['metadata'] assert 'user_script' in exp['metadata'] - assert os.path.isabs(exp['metadata']['user_script']) - assert exp['metadata']['user_args'] == ['--configuration', 'script_config.yaml'] + assert exp['metadata']['user_args'] == ['./black_box_w_config_other.py', '--configuration', + 'script_config.yaml'] trials = list(database.trials.find({'experiment': exp_id})) assert len(trials) <= 15 @@ -536,4 +581,57 @@ def test_demo_with_nondefault_config_keyword(database, monkeypatch): assert params[0]['type'] == 'real' assert (params[0]['value'] - 34.56789) < 1e-5 - orion.core.config.user_script_config = 'config' + orion.core.config.worker.user_script_config = 'config' + + +@pytest.mark.usefixtures("clean_db") +@pytest.mark.usefixtures("null_db_instances") +def test_demo_precision(database, monkeypatch): + """Test a simple usage scenario.""" + monkeypatch.chdir(os.path.dirname(os.path.abspath(__file__))) + + user_args = [ + "-x~uniform(-50, 50, precision=5)"] + + orion.core.cli.main([ + "hunt", "--config", "./orion_config.yaml", "--max-trials", "2", + "./black_box.py"] + user_args) + + exp = list(database.experiments.find({'name': 'voila_voici'})) + exp = exp[0] + exp_id = exp['_id'] + trials = list(database.trials.find({'experiment': exp_id})) + trials = list(sorted(trials, key=lambda trial: trial['submit_time'])) + params = trials[-1]['params'] + value = params[0]['value'] + + assert value == float(numpy.format_float_scientific(value, precision=4)) + + +@pytest.mark.usefixtures("setup_pickleddb_database") +def test_debug_mode(monkeypatch): + """Test debug mode.""" + monkeypatch.chdir(os.path.dirname(os.path.abspath(__file__))) + + user_args = [ + "-x~uniform(-50, 50, precision=5)"] + + orion.core.cli.main([ + "--debug", "hunt", "--config", "./orion_config.yaml", "--max-trials", "2", + "./black_box.py"] + user_args) + + storage = get_storage() + + assert isinstance(storage, Legacy) + assert isinstance(storage._db, EphemeralDB) + + +def test_no_args(capsys): + """Test that help is printed when no args are given.""" + with pytest.raises(SystemExit): + orion.core.cli.main([]) + + captured = capsys.readouterr().out + + assert 'usage:' in captured + assert 'Traceback' not in captured diff --git a/tests/functional/example/orion_config.yaml b/tests/functional/example/orion_config.yaml new file mode 100644 index 000000000..19e95ab7b --- /dev/null +++ b/tests/functional/example/orion_config.yaml @@ -0,0 +1,7 @@ +name: scikit-iris-tutorial + +max_trials: 1 + +algorithms: + random: + seed: 1 diff --git a/tests/functional/example/test_scikit_learn.py b/tests/functional/example/test_scikit_learn.py new file mode 100644 index 000000000..774cc4d60 --- /dev/null +++ b/tests/functional/example/test_scikit_learn.py @@ -0,0 +1,63 @@ +"""Tests the minimalist example script on scitkit-learn and its integration to Oríon.""" +import os +import subprocess + +import pytest + +from orion.client import create_experiment +import orion.core.cli +from orion.storage.base import get_storage + + +def test_script_integrity(capsys): + """Verifies the example script can run in standalone via `python ...`.""" + script = os.path.abspath("examples/scikitlearn-iris/main.py") + + return_code = subprocess.call(["python", script, '0.1']) + + assert return_code != 2, "The example script does not exists." + assert return_code != 1, "The example script did not terminates its execution." + assert return_code == 0 and not capsys.readouterr().err, \ + "The example script encountered an error during its execution." + + +@pytest.mark.usefixtures("setup_pickleddb_database") +def test_orion_runs_script(monkeypatch): + """Verifies Oríon can execute the example script.""" + script = os.path.abspath("examples/scikitlearn-iris/main.py") + monkeypatch.chdir(os.path.dirname(os.path.abspath(__file__))) + config = "orion_config.yaml" + + orion.core.cli.main(["hunt", "--config", config, "python", script, + "orion~choices([0.1])"]) + + experiment = create_experiment(name="scikit-iris-tutorial") + assert experiment is not None + assert experiment.version == 1 + + keys = experiment.space.keys() + assert len(keys) == 1 + assert '/_pos_2' in keys + + storage = get_storage() + trials = storage.fetch_trials(uid=experiment.id) + assert len(trials) == 1 + + trial = trials[0] + assert trial.status == 'completed' + assert trial.params['/_pos_2'] == 0.1 + + +@pytest.mark.usefixtures("setup_pickleddb_database") +def test_result_reproducibility(monkeypatch): + """Verifies the script results stays consistent (with respect to the documentation).""" + script = os.path.abspath("examples/scikitlearn-iris/main.py") + monkeypatch.chdir(os.path.dirname(os.path.abspath(__file__))) + config = "orion_config.yaml" + + orion.core.cli.main(["hunt", "--config", config, "python", script, + "orion~choices([0.1])"]) + + experiment = create_experiment(name="scikit-iris-tutorial") + assert 'best_evaluation' in experiment.stats + assert experiment.stats['best_evaluation'] == 0.6666666666666667 diff --git a/tests/functional/gradient_descent_algo/setup.py b/tests/functional/gradient_descent_algo/setup.py index c8361dae3..76e43a15a 100644 --- a/tests/functional/gradient_descent_algo/setup.py +++ b/tests/functional/gradient_descent_algo/setup.py @@ -47,7 +47,7 @@ 'Topic :: Scientific/Engineering', 'Topic :: Scientific/Engineering :: Artificial Intelligence', ] + [('Programming Language :: Python :: %s' % x) - for x in '3 3.4 3.5 3.6'.split()] + for x in '3 3.6 3.7 3.8'.split()] if __name__ == '__main__': setup(**setup_args) diff --git a/tests/requirements.txt b/tests/requirements.txt index b08c3dc90..7636cd189 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,3 +1,4 @@ pytest >= 3.6.0 pytest-xdist pytest-timeout +git+git://github.com/Delaunay/track diff --git a/tests/stress/client/stress_experiment.py b/tests/stress/client/stress_experiment.py new file mode 100644 index 000000000..3eefded52 --- /dev/null +++ b/tests/stress/client/stress_experiment.py @@ -0,0 +1,315 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""Perform a stress tests on python API.""" +from multiprocessing import Pool +import os +import random +import time +import traceback + +import matplotlib.pyplot as plt +from pymongo import MongoClient + +from orion.client import create_experiment +from orion.core.io.database import DatabaseTimeout +from orion.core.utils.exceptions import SampleTimeout +from orion.core.utils.tests import update_singletons + + +DB_FILE = 'stress.pkl' + + +def f(x, worker): + """Sleep and return objective equal to param""" + print(f'{worker: 6d} {x: 5f}') + time.sleep(max(0, random.gauss(1, 0.2))) + return [dict(name='objective', value=x, type='objective')] + + +def get_experiment(storage, space_type, size): + """Create an experiment or load from DB if already existing + + Parameters + ---------- + storage: str + Can be `pickleddb` or `mongodb`. A default configuration is used for each. + space_type: str + Can be one of + - `discrete` Search space is discrete and limited to `max_trials` + - `real-seeded` Search space is continuous and algos is seeded, leading to many race + conditions while algos are sampling the same points in parallel, or + - `real` Search space is real and algo is not seeded, leading to very few race conditions. + size: int + This defines `max_trials`, and the size of the search space (`uniform(0, size)`). + + """ + if storage == 'pickleddb': + storage_config = { + 'type': 'pickleddb', + 'host': DB_FILE + } + elif storage == 'mongodb': + storage_config = { + 'type': 'mongodb', + 'name': 'stress', + 'host': 'mongodb://user:pass@localhost' + } + + discrete = space_type == 'discrete' + high = size # * 2 + + return create_experiment( + 'stress-test', + space={'x': f'uniform(0, {high}, discrete={discrete})'}, + max_trials=size, + max_idle_time=60 * 5, + algorithms={ + 'random': { + 'seed': None if space_type == 'real' else 1 + } + }, + storage={ + 'type': 'legacy', + 'database': storage_config + }) + + +def worker(worker_id, storage, space_type, size): + """Run trials until experiment is done + + Parameters + ---------- + worker_id: int + ID of the worker. This is used to distinguish logs from different workers. + storage: str + See `get_experiment`. + space_type: str + See `get_experiment`. + size: int + See `get_experiment`. + + """ + try: + experiment = get_experiment(storage, space_type, size) + + assert experiment.version == 1, experiment.version + + print(f'{worker_id: 6d} enters') + + num_trials = 0 + while not experiment.is_done: + try: + trial = experiment.suggest() + except SampleTimeout: + trial = None + + if trial is None: + break + + results = f(trial.params['x'], worker_id) + num_trials += 1 + experiment.observe(trial, results=results) + + print(f'{worker_id: 6d} leaves | is done? {experiment.is_done}') + except DatabaseTimeout as e: + print(f'{worker_id: 6d} timeouts and leaves') + return num_trials + except Exception as e: + print(f'{worker_id: 6d} crashes') + traceback.print_exc() + return None + + return num_trials + + +def stress_test(storage, space_type, workers, size): + """Spawn workers and run stress test with verifications + + Parameters + ---------- + storage: str + See `get_experiment`. + space_type: str + See `get_experiment`. + workers: int + Number of workers to run in parallel. + size: int + See `get_experiment`. + + Returns + ------- + `list` of `orion.core.worker.trial.Trial` + List of all trials at the end of the stress test + + """ + if storage == 'pickleddb': + if os.path.exists(DB_FILE): + os.remove(DB_FILE) + elif storage == 'mongodb': + client = MongoClient(username='user', password='pass', authSource='stress') + database = client.stress + database.experiments.drop() + database.lying_trials.drop() + database.trials.drop() + database.workers.drop() + database.resources.drop() + client.close() + update_singletons() + + print('Worker | Point') + + with Pool(workers) as p: + results = p.starmap( + worker, + zip(range(workers), + [storage] * workers, + [space_type] * workers, + [size] * workers)) + + assert None not in results, 'A worker crashed unexpectedly. See logs for the error messages.' + assert all(n > 0 for n in results), 'A worker could not execute any trial.' + + if space_type in ['discrete', 'real-seeded']: + assert sum(results) == size, results + else: + assert sum(results) >= size, results + + experiment = get_experiment(storage, space_type, size) + + trials = experiment.fetch_trials() + + if storage == 'pickleddb': + os.remove(DB_FILE) + elif storage == 'mongodb': + client = MongoClient(username='user', password='pass', authSource='stress') + database = client.stress + database.experiments.drop() + database.lying_trials.drop() + database.trials.drop() + database.workers.drop() + database.resources.drop() + client.close() + update_singletons() + + return trials + + +def get_timestamps(trials, size, space_type): + """Get start timestamps of the trials + + Parameters + ---------- + trials: `list` of `orion.core.worker.trial.Trial` + List of all trials at the end of the stress test + space_type: str + See `get_experiment`. + size: int + See `get_experiment`. + + Returns + ------- + (`list`, `list`) + Where rval[0] is start timestamp and rval[1] is the index of the trial. + For instance the i-th trial timestamp is rval[0][rval[1].index(i)]. + + """ + hparams = set() + x = [] + y = [] + + start_time = None + for i, trial in enumerate(trials): + hparams.add(trial.params['x']) + assert trial.objective.value == trial.params['x'] + if start_time is None: + start_time = trial.submit_time + x.append((trial.submit_time - start_time).total_seconds()) + y.append(i) + + if space_type in ['discrete', 'real-seeded']: + assert len(hparams) == size + else: + assert len(hparams) >= size + + return x[:size], y[:size] + + +def benchmark(workers, size): + """Get start timestamps of the trials + + Parameters + ---------- + workers: int + see: `stress_test`. + size: int + See `get_experiment`. + + Returns + ------- + dict + Dictionary containing all results of all stress tests. + Each key is (backend, space_type). See `get_experiment` for the supported types + of `backend`s and `space_type`s. Each values result[(backend, space_type)] is + in the form of a (x, y) tuple, where x a the list start timestamps and y is the indexes of + the trials. See `get_timestamps` for more details. + + """ + results = {} + for backend in ['mongodb', 'pickleddb']: + for space_type in ['discrete', 'real', 'real-seeded']: + trials = stress_test(backend, space_type, workers, size) + results[(backend, space_type)] = get_timestamps(trials, size, space_type) + + return results + + +def main(): + """Run all stress tests and render the plot""" + size = 500 + + num_workers = [1, 4, 16, 32, 64, 128] + + fig, axis = plt.subplots( + len(num_workers), 1, figsize=(5, 1.8 * len(num_workers)), + gridspec_kw={'hspace': 0.01, 'wspace': 0}, + sharex='col') + + results = {} + + for i, workers in enumerate(num_workers): + + results[workers] = benchmark(workers, size) + + for backend in ['mongodb', 'pickleddb']: + for space_type in ['discrete', 'real', 'real-seeded']: + x, y = results[workers][(backend, space_type)] + axis[i].plot(x, y, label=f'{backend}-{space_type}') + + for i, workers in enumerate(num_workers): + # We pick 'pickleddb' and discrete=True as the reference for the slowest ones + x, y = results[min(num_workers)][('pickleddb', 'discrete')] + d_x = max(x) - min(x) + d_y = max(y) - min(y) + if i < len(num_workers) - 1: + axis[i].text(min(x) + d_x * 0.6, min(y) + d_y * 0.1, f'{workers: 3d} workers') + else: + axis[i].text(min(x) + d_x * 0.6, min(y) + d_y * 0.7, f'{workers: 3d} workers') + + for i in range(len(num_workers) - 1): + axis[i].spines['top'].set_visible(False) + axis[i].spines['right'].set_visible(False) + + axis[-1].spines['right'].set_visible(False) + axis[-1].spines['top'].set_visible(False) + + axis[-1].set_xlabel('Time (s)') + axis[-1].set_ylabel('Number of trials') + axis[-1].legend() + + plt.subplots_adjust(left=0.15, bottom=0.05, top=1, right=1) + + plt.savefig('test.png') + + +if __name__ == '__main__': + main() diff --git a/tests/stress/requirements.txt b/tests/stress/requirements.txt new file mode 100644 index 000000000..6ccafc3f9 --- /dev/null +++ b/tests/stress/requirements.txt @@ -0,0 +1 @@ +matplotlib diff --git a/tests/unittests/algo/test_asha.py b/tests/unittests/algo/test_asha.py index bc63ceb50..318d309dd 100644 --- a/tests/unittests/algo/test_asha.py +++ b/tests/unittests/algo/test_asha.py @@ -7,8 +7,8 @@ import numpy as np import pytest -from orion.algo.asha import ASHA, Bracket -from orion.algo.space import Fidelity, Real, Space +from orion.algo.asha import ASHA, Bracket, compute_budgets +from orion.algo.space import Fidelity, Integer, Real, Space @pytest.fixture @@ -66,6 +66,32 @@ def rung_2(rung_1): map(lambda v: (v[0], (9, v[0])), sorted(rung_1[1].values()))}) +def test_compute_budgets(): + """Verify proper computation of budgets on a logarithmic scale""" + # Check typical values + assert compute_budgets(1, 16, 4, 3) == [1, 4, 16] + # Check rounding (max_resources is not a multiple of reduction_factor) + assert compute_budgets(1, 30, 4, 3) == [1, 5, 30] + # Check rounding (min_resources may be rounded below its actual value) + assert compute_budgets(25, 1000, 2, 6) == [25, 52, 109, 229, 478, 1000] + # Check min_resources + assert compute_budgets(5, 125, 5, 3) == [5, 25, 125] + # Check num_rungs + assert compute_budgets(1, 16, 2, 5) == [1, 2, 4, 8, 16] + + +def test_compute_compressed_budgets(): + """Verify proper computation of budgets when scale is small and integer rounding creates + duplicates + """ + assert compute_budgets(1, 16, 2, 10) == [1, 2, 3, 4, 5, 6, 7, 9, 12, 16] + + with pytest.raises(ValueError) as exc: + compute_budgets(1, 2, 2, 10) + + assert 'Cannot build budgets below max_resources' in str(exc.value) + + class TestBracket(): """Tests for the `Bracket` class.""" @@ -370,6 +396,21 @@ def sample(num=1, seed=None): assert 'ASHA keeps sampling already existing points.' in str(exc.value) + def test_suggest_in_finite_cardinality(self): + """Test that suggest None when search space is empty""" + space = Space() + space.register(Integer('yolo1', 'uniform', 0, 6)) + space.register(Fidelity('epoch', 1, 9, 3)) + + asha = ASHA(space) + for i in range(6): + asha.observe([(1, i)], [{'objective': i}]) + + for i in range(2): + asha.observe([(3, i)], [{'objective': i}]) + + assert asha.suggest() is None + def test_suggest_promote(self, asha, bracket, rung_0): """Test that correct point is promoted and returned.""" asha.brackets = [bracket] diff --git a/tests/unittests/algo/test_base.py b/tests/unittests/algo/test_base.py index 85eaf3398..9477ef30e 100644 --- a/tests/unittests/algo/test_base.py +++ b/tests/unittests/algo/test_base.py @@ -3,6 +3,7 @@ """Example usage and tests for :mod:`orion.algo.base`.""" from orion.algo.base import BaseAlgorithm +from orion.algo.space import Integer, Real, Space def test_init(dumbalgo): @@ -69,3 +70,64 @@ def test_space_setter(dumbalgo): assert algo.naedw.value == 9 assert algo.naekei.space == 'etsh' assert algo.naekei.judgement == 10 + + +def test_state_dict(dumbalgo): + """Check whether trials_info is in the state dict""" + nested_algo = {'DumbAlgo': dict( + value=6, + scoring=5 + )} + algo = dumbalgo(8, value=1, subone=nested_algo) + algo.suggest() + assert not algo.state_dict['_trials_info'] + algo.observe([(1, 2)], [{'objective': 3}]) + assert len(algo.state_dict['_trials_info']) == 1 + algo.observe([(1, 2)], [{'objective': 3}]) + assert len(algo.state_dict['_trials_info']) == 1 + + +def test_is_done_cardinality(monkeypatch, dumbalgo): + """Check whether algorithm will stop with base algorithm cardinality check""" + monkeypatch.delattr(dumbalgo, 'is_done') + + space = Space() + space.register(Integer('yolo1', 'uniform', 1, 4)) + + algo = dumbalgo(space) + algo.suggest() + for i in range(1, 6): + algo.observe([[i]], [{'objective': 3}]) + + assert len(algo.state_dict['_trials_info']) == 5 + assert algo.is_done + + space = Space() + space.register(Real('yolo1', 'uniform', 1, 4)) + + algo = dumbalgo(space) + algo.suggest() + for i in range(1, 6): + algo.observe([[i]], [{'objective': 3}]) + + assert len(algo.state_dict['_trials_info']) == 5 + assert not algo.is_done + + +def test_is_done_max_trials(monkeypatch, dumbalgo): + """Check whether algorithm will stop with base algorithm max_trials check""" + monkeypatch.delattr(dumbalgo, 'is_done') + + space = Space() + space.register(Real('yolo1', 'uniform', 1, 4)) + + algo = dumbalgo(space) + algo.suggest() + for i in range(1, 5): + algo.observe([[i]], [{'objective': 3}]) + + assert len(algo.state_dict['_trials_info']) == 4 + assert not algo.is_done + + dumbalgo.max_trials = 4 + assert algo.is_done diff --git a/tests/unittests/algo/test_hyperband.py b/tests/unittests/algo/test_hyperband.py new file mode 100644 index 000000000..3755ca808 --- /dev/null +++ b/tests/unittests/algo/test_hyperband.py @@ -0,0 +1,756 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""Tests for :mod:`orion.algo.hyperband`.""" + +import hashlib + +import numpy as np +import pytest + +from orion.algo.hyperband import Bracket, compute_budgets, Hyperband +from orion.algo.space import Fidelity, Integer, Real, Space + + +@pytest.fixture +def space(): + """Create a Space with a real dimension and a fidelity value.""" + space = Space() + space.register(Real('lr', 'uniform', 0, 1)) + space.register(Fidelity('epoch', 1, 9, 3)) + return space + + +@pytest.fixture +def budgets(): + """Return a configuration for a bracket.""" + return [(9, 1), (3, 3), (1, 9)] + + +@pytest.fixture +def hyperband(space): + """Return an instance of Hyperband.""" + return Hyperband(space, repetitions=1) + + +@pytest.fixture +def bracket(budgets, hyperband): + """Return a `Bracket` instance configured with `b_config`.""" + return Bracket(hyperband, budgets, 1) + + +@pytest.fixture +def rung_0(): + """Create fake points and objectives for rung 0.""" + points = np.linspace(0, 8, 9) + return dict( + n_trials=9, + resources=1, + results={hashlib.md5(str([point]).encode('utf-8')).hexdigest(): (point, (1, point)) + for point in points}) + + +@pytest.fixture +def rung_1(rung_0): + """Create fake points and objectives for rung 1.""" + values = map(lambda v: (v[0], (3, v[0])), list(sorted(rung_0['results'].values()))[:3]) + return dict( + n_trials=3, + resources=3, + results={hashlib.md5(str([value[0]]).encode('utf-8')).hexdigest(): value + for value in values}) + + +@pytest.fixture +def rung_2(rung_1): + """Create fake points and objectives for rung 1.""" + values = map(lambda v: (v[0], (9, v[0])), list(sorted(rung_1['results'].values()))[:1]) + return dict( + n_trials=1, + resources=9, + results={hashlib.md5(str([value[0]]).encode('utf-8')).hexdigest(): value + for value in values}) + + +def test_compute_budgets(): + """Verify proper computation of budgets on a logarithmic scale""" + # Check typical values + assert compute_budgets(81, 3) == [[(81, 1), (27, 3), (9, 9), (3, 27), (1, 81)], + [(27, 3), (9, 9), (3, 27), (1, 81)], + [(9, 9), (3, 27), (1, 81)], + [(6, 27), (2, 81)], [(5, 81)]] + assert compute_budgets(16, 4) == [[(16, 1), (4, 4), (1, 16)], [(4, 4), (1, 16)], [(3, 16)]] + assert compute_budgets(16, 5) == [[(5, 3), (1, 16)], [(2, 16)]] + + +class TestBracket(): + """Tests for the `Bracket` class.""" + + def test_rungs_creation(self, bracket): + """Test the creation of rungs for bracket 0.""" + assert len(bracket.rungs) == 3 + assert bracket.rungs[0] == dict(n_trials=9, resources=1, results=dict()) + assert bracket.rungs[1] == dict(n_trials=3, resources=3, results=dict()) + assert bracket.rungs[2] == dict(n_trials=1, resources=9, results=dict()) + + def test_register(self, hyperband, bracket): + """Check that a point is correctly registered inside a bracket.""" + bracket.hyperband = hyperband + point = (1, 0.0) + point_hash = hashlib.md5(str([0.0]).encode('utf-8')).hexdigest() + + bracket.register(point, 0.0) + + assert len(bracket.rungs[0]) + assert point_hash in bracket.rungs[0]['results'] + assert (0.0, point) == bracket.rungs[0]['results'][point_hash] + + def test_bad_register(self, hyperband, bracket): + """Check that a non-valid point is not registered.""" + bracket.hyperband = hyperband + + with pytest.raises(IndexError) as ex: + bracket.register((55, 0.0), 0.0) + + assert 'Bad fidelity level 55' in str(ex.value) + + def test_candidate_promotion(self, hyperband, bracket, rung_0): + """Test that correct point is promoted.""" + bracket.hyperband = hyperband + bracket.rungs[0] = rung_0 + + points = bracket.get_candidates(0) + + assert points[0] == (1, 0.0) + + def test_promotion_with_rung_1_hit(self, hyperband, bracket, rung_0): + """Test that get_candidate gives us the next best thing if point is already in rung 1.""" + point = (1, 0.0) + point_hash = hashlib.md5(str([0.0]).encode('utf-8')).hexdigest() + bracket.hyperband = hyperband + bracket.rungs[0] = rung_0 + bracket.rungs[1]['results'][point_hash] = (0.0, point) + + points = bracket.get_candidates(0) + + assert points[0] == (1, 1) + + def test_no_promotion_when_rung_full(self, hyperband, bracket, rung_0, rung_1): + """Test that get_candidate returns `None` if rung 1 is full.""" + bracket.hyperband = hyperband + bracket.rungs[0] = rung_0 + bracket.rungs[1] = rung_1 + + points = bracket.get_candidates(0) + + assert points == [] + + def test_no_promotion_if_not_completed(self, hyperband, bracket, rung_0): + """Test the get_candidate return None if trials are not completed.""" + bracket.hyperband = hyperband + bracket.rungs[0] = rung_0 + rung = bracket.rungs[0]['results'] + + # points = bracket.get_candidates(0) + + for p_id in rung.keys(): + rung[p_id] = (None, rung[p_id][1]) + + with pytest.raises(AssertionError): + bracket.get_candidates(0) + + def test_is_done(self, bracket, rung_0): + """Test that the `is_done` property works.""" + assert not bracket.is_done + + # Actual value of the point is not important here + bracket.rungs[2]['results'] = {'1': (1, 0.0), '2': (1, 0.0), '3': (1, 0.0)} + + assert bracket.is_done + + def test_update_rungs_return_candidate(self, hyperband, bracket, rung_1): + """Check if a valid modified candidate is returned by update_rungs.""" + bracket.hyperband = hyperband + bracket.rungs[1] = rung_1 + point_hash = hashlib.md5(str([0.0]).encode('utf-8')).hexdigest() + + candidates = bracket.promote() + + assert point_hash in bracket.rungs[1]['results'] + assert bracket.rungs[1]['results'][point_hash] == (0.0, (3, 0.0)) + assert candidates[0][0] == 9 + + def test_update_rungs_return_no_candidate(self, hyperband, bracket, rung_1): + """Check if no candidate is returned by update_rungs.""" + bracket.hyperband = hyperband + + candidate = bracket.promote() + + assert candidate is None + + def test_get_point_max_resource(self, hyperband, bracket, rung_0, rung_1, rung_2): + """Test to get the max resource R for a particular point""" + bracket.hyperband = hyperband + bracket.rungs[0] = rung_0 + + assert bracket.get_point_max_resource(point=(1, 0.0)) == 1 + assert bracket.get_point_max_resource(point=(1, 8.0)) == 1 + + bracket.rungs[1] = rung_1 + assert bracket.get_point_max_resource(point=(1, 0.0)) == 3 + assert bracket.get_point_max_resource(point=(1, 8.0)) == 1 + + bracket.rungs[2] = rung_2 + assert bracket.get_point_max_resource(point=(1, 0.0)) == 9 + assert bracket.get_point_max_resource(point=(1, 8.0)) == 1 + + def test_repr(self, bracket, rung_0, rung_1, rung_2): + """Test the string representation of Bracket""" + bracket.rungs[0] = rung_0 + bracket.rungs[1] = rung_1 + bracket.rungs[2] = rung_2 + + assert str(bracket) == 'Bracket(resource=[1, 3, 9], repetition id=1)' + + +class TestHyperband(): + """Tests for the algo Hyperband.""" + + def test_register(self, hyperband, bracket, rung_0, rung_1): + """Check that a point is registered inside the bracket.""" + hyperband.brackets = [bracket] + bracket.hyperband = hyperband + bracket.rungs = [rung_0, rung_1] + point = (1, 0.0) + point_hash = hashlib.md5(str([0.0]).encode('utf-8')).hexdigest() + + hyperband.observe([point], [{'objective': 0.0}]) + + assert len(bracket.rungs[0]) + assert point_hash in bracket.rungs[0]['results'] + assert (0.0, point) == bracket.rungs[0]['results'][point_hash] + + def test_register_bracket_multi_fidelity(self, space): + """Check that a point is registered inside the same bracket for diff fidelity.""" + hyperband = Hyperband(space) + + value = 50 + fidelity = 1 + point = (fidelity, value) + point_hash = hashlib.md5(str([value]).encode('utf-8')).hexdigest() + + hyperband.observe([point], [{'objective': 0.0}]) + + bracket = hyperband.brackets[0] + + assert len(bracket.rungs[0]) + assert point_hash in bracket.rungs[0]['results'] + assert (0.0, point) == bracket.rungs[0]['results'][point_hash] + + fidelity = 3 + point = [fidelity, value] + point_hash = hashlib.md5(str([value]).encode('utf-8')).hexdigest() + + hyperband.observe([point], [{'objective': 0.0}]) + + assert len(bracket.rungs[0]) + assert point_hash in bracket.rungs[1]['results'] + assert (0.0, point) != bracket.rungs[0]['results'][point_hash] + assert (0.0, point) == bracket.rungs[1]['results'][point_hash] + + def test_register_next_bracket(self, space): + """Check that a point is registered inside the good bracket when higher fidelity.""" + hyperband = Hyperband(space) + + value = 50 + fidelity = 3 + point = (fidelity, value) + point_hash = hashlib.md5(str([value]).encode('utf-8')).hexdigest() + + hyperband.observe([point], [{'objective': 0.0}]) + + assert sum(len(rung['results']) for rung in hyperband.brackets[0].rungs) == 0 + assert sum(len(rung['results']) for rung in hyperband.brackets[1].rungs) == 1 + assert sum(len(rung['results']) for rung in hyperband.brackets[2].rungs) == 0 + assert point_hash in hyperband.brackets[1].rungs[0]['results'] + assert (0.0, point) == hyperband.brackets[1].rungs[0]['results'][point_hash] + + value = 51 + fidelity = 9 + point = (fidelity, value) + point_hash = hashlib.md5(str([value]).encode('utf-8')).hexdigest() + + hyperband.observe([point], [{'objective': 0.0}]) + + assert sum(len(rung['results']) for rung in hyperband.brackets[0].rungs) == 0 + assert sum(len(rung['results']) for rung in hyperband.brackets[1].rungs) == 1 + assert sum(len(rung['results']) for rung in hyperband.brackets[2].rungs) == 1 + assert point_hash in hyperband.brackets[2].rungs[0]['results'] + assert (0.0, point) == hyperband.brackets[2].rungs[0]['results'][point_hash] + + def test_register_invalid_fidelity(self, space): + """Check that a point cannot registered if fidelity is invalid.""" + hyperband = Hyperband(space) + + value = 50 + fidelity = 2 + point = (fidelity, value) + + with pytest.raises(ValueError) as ex: + hyperband.observe([point], [{'objective': 0.0}]) + + assert 'No bracket found for point' in str(ex.value) + + def test_register_corrupted_db(self, caplog, space): + """Check that a point cannot registered if passed in order diff than fidelity.""" + hyperband = Hyperband(space) + + value = 50 + fidelity = 3 + point = (fidelity, value) + + hyperband.observe([point], [{'objective': 0.0}]) + assert 'Point registered to wrong bracket' not in caplog.text + + fidelity = 1 + point = [fidelity, value] + + caplog.clear() + hyperband.observe([point], [{'objective': 0.0}]) + assert 'Point registered to wrong bracket' in caplog.text + + def test_get_id(self, space): + """Test valid id of points""" + hyperband = Hyperband(space) + + assert hyperband.get_id(['whatever', 1]) == hyperband.get_id(['is here', 1]) + assert hyperband.get_id(['whatever', 1]) != hyperband.get_id(['is here', 2]) + assert hyperband.get_id(['whatever', 1], ignore_fidelity=False) != \ + hyperband.get_id(['is here', 1], ignore_fidelity=False) + assert hyperband.get_id(['whatever', 1], ignore_fidelity=False) != \ + hyperband.get_id(['is here', 2], ignore_fidelity=False) + assert hyperband.get_id(['same', 1], ignore_fidelity=False) == \ + hyperband.get_id(['same', 1], ignore_fidelity=False) + assert hyperband.get_id(['same', 1], ignore_fidelity=False) != \ + hyperband.get_id(['same', 1]) + + def test_get_id_multidim(self): + """Test valid id for points with dim of shape > 1""" + space = Space() + space.register(Fidelity('epoch', 1, 9, 3)) + space.register(Real('lr', 'uniform', 0, 1, shape=2)) + + hyperband = Hyperband(space) + + assert hyperband.get_id(['whatever', [1, 1]]) == hyperband.get_id(['is here', [1, 1]]) + assert hyperband.get_id(['whatever', [1, 1]]) != hyperband.get_id(['is here', [2, 2]]) + assert hyperband.get_id(['whatever', [1, 1]], ignore_fidelity=False) != \ + hyperband.get_id(['is here', [1, 1]], ignore_fidelity=False) + assert hyperband.get_id(['whatever', [1, 1]], ignore_fidelity=False) != \ + hyperband.get_id(['is here', [2, 2]], ignore_fidelity=False) + assert hyperband.get_id(['same', [1, 1]], ignore_fidelity=False) == \ + hyperband.get_id(['same', [1, 1]], ignore_fidelity=False) + assert hyperband.get_id(['same', [1, 1]], ignore_fidelity=False) != \ + hyperband.get_id(['same', [1, 1]]) + + def test_suggest_new(self, monkeypatch, hyperband, bracket, rung_0, rung_1, rung_2): + """Test that a new point is sampled.""" + hyperband.brackets = [bracket] + bracket.hyperband = hyperband + + def sample(num=1, seed=None): + return [('fidelity', i) for i in range(num)] + + monkeypatch.setattr(hyperband.space, 'sample', sample) + + points = hyperband.suggest() + + assert points[0] == (1.0, 0) + assert points[1] == (1.0, 1) + + def test_suggest_duplicates_between_calls(self, monkeypatch, hyperband, bracket): + """Test that same points are not allowed in different suggest call of + the same hyperband execution. + """ + hyperband.brackets = [bracket] + bracket.hyperband = hyperband + + duplicate_point = ('fidelity', 0.0) + new_point = ('fidelity', 0.5) + + duplicate_id = hashlib.md5(str([duplicate_point]).encode('utf-8')).hexdigest() + bracket.rungs[0]['results'] = {duplicate_id: (0.0, duplicate_point)} + + hyperband.trial_info_wo_fidelity[hyperband.get_id(duplicate_point)] = bracket + + points = [duplicate_point, new_point] + + def sample(num=1, seed=None): + return points + [('fidelity', i) for i in range(num - 2)] + + monkeypatch.setattr(hyperband.space, 'sample', sample) + + assert hyperband.suggest()[0][1] == new_point[1] + + def test_suggest_duplicates_one_call(self, monkeypatch, hyperband, bracket): + """Test that same points are not allowed in the same suggest call ofxs + the same hyperband execution. + """ + hyperband.brackets = [bracket] + bracket.hyperband = hyperband + + zhe_point = [(1, 0.0), (1, 1.0), (1, 1.0), (1, 2.0)] + + def sample(num=1, seed=None): + return zhe_point * num + + monkeypatch.setattr(hyperband.space, 'sample', sample) + zhe_samples = hyperband.suggest() + + assert zhe_samples[0][1] == 0.0 + assert zhe_samples[1][1] == 1.0 + assert zhe_samples[2][1] == 2.0 + + zhe_point = [(3, 0.0), (3, 1.0), (3, 5.0), (3, 4.0)] + hyperband.trial_info_wo_fidelity[hyperband.get_id((1, 0.0))] = bracket + hyperband.trial_info_wo_fidelity[hyperband.get_id((1, 1.0))] = bracket + zhe_samples = hyperband.suggest() + assert zhe_samples[0][1] == 5.0 + assert zhe_samples[1][1] == 4.0 + + def test_suggest_duplicates_between_execution(self, monkeypatch, hyperband, budgets): + """Test that sampling collisions are handled between different hyperband execution.""" + hyperband.repetitions = 2 + bracket = Bracket(hyperband, budgets, 1) + hyperband.brackets = [bracket] + bracket.hyperband = hyperband + + for i in range(9): + hyperband.observe([(1, i)], [{'objective': i}]) + + for i in range(3): + hyperband.observe([(3, i)], [{'objective': i}]) + + hyperband.observe([(9, 0)], [{'objective': 0}]) + + assert not hyperband.is_done + + zhe_point = [(9, 0), (9, 1), (9, 2)] + + def sample(num=1, seed=None): + return zhe_point * num + + monkeypatch.setattr(hyperband.space, 'sample', sample) + zhe_samples = hyperband.suggest() + assert zhe_samples == [(9, 1), (9, 2)] + + def test_suggest_inf_duplicates(self, monkeypatch, hyperband, bracket, rung_0, rung_1, rung_2): + """Test that sampling inf collisions will return None.""" + hyperband.brackets = [bracket] + bracket.hyperband = hyperband + + zhe_point = ('fidelity', 0.0) + hyperband.trial_info_wo_fidelity[hyperband.get_id(zhe_point)] = bracket + + def sample(num=1, seed=None): + return [zhe_point] * num + + monkeypatch.setattr(hyperband.space, 'sample', sample) + + assert hyperband.suggest() is None + + def test_suggest_in_finite_cardinality(self): + """Test that suggest None when search space is empty""" + space = Space() + space.register(Integer('yolo1', 'uniform', 0, 6)) + space.register(Fidelity('epoch', 1, 9, 3)) + + hyperband = Hyperband(space, repetitions=1) + for i in range(6): + hyperband.observe([(1, i)], [{'objective': i}]) + + assert hyperband.suggest() is None + + def test_suggest_promote(self, hyperband, bracket, rung_0): + """Test that correct point is promoted and returned.""" + hyperband.brackets = [bracket] + bracket.hyperband = hyperband + bracket.rungs[0] = rung_0 + + points = hyperband.suggest() + + assert points == [(3, i) for i in range(3)] + + def test_is_filled(self, hyperband, bracket, rung_0, rung_1, rung_2): + """Test that Hyperband bracket detects when rung is filled.""" + hyperband.brackets = [bracket] + bracket.hyperband = hyperband + bracket.rungs[0] = rung_0 + + rung = bracket.rungs[0]['results'] + trial_id = next(iter(rung.keys())) + objective, point = rung.pop(trial_id) + + assert not bracket.is_filled + assert not bracket.has_rung_filled(0) + + rung[trial_id] = (objective, point) + + assert bracket.is_filled + assert bracket.has_rung_filled(0) + assert not bracket.has_rung_filled(1) + assert not bracket.has_rung_filled(2) + + bracket.rungs[1] = rung_1 + + rung = bracket.rungs[1]['results'] + trial_id = next(iter(rung.keys())) + objective, point = rung.pop(trial_id) + + assert bracket.is_filled # Should depend first rung only + assert bracket.has_rung_filled(0) + assert not bracket.has_rung_filled(1) + + rung[trial_id] = (objective, point) + + assert bracket.is_filled # Should depend first rung only + assert bracket.has_rung_filled(0) + assert bracket.has_rung_filled(1) + assert not bracket.has_rung_filled(2) + + bracket.rungs[2] = rung_2 + + rung = bracket.rungs[2]['results'] + trial_id = next(iter(rung.keys())) + objective, point = rung.pop(trial_id) + + assert bracket.is_filled # Should depend first rung only + assert bracket.has_rung_filled(0) + assert bracket.has_rung_filled(1) + assert not bracket.has_rung_filled(2) + + rung[trial_id] = (objective, point) + + assert bracket.is_filled # Should depend first rung only + assert bracket.has_rung_filled(0) + assert bracket.has_rung_filled(1) + assert bracket.has_rung_filled(2) + + def test_is_ready(self, hyperband, bracket, rung_0, rung_1, rung_2): + """Test that Hyperband bracket detects when rung is ready.""" + hyperband.brackets = [bracket] + bracket.hyperband = hyperband + bracket.rungs[0] = rung_0 + + rung = bracket.rungs[0]['results'] + trial_id = next(iter(rung.keys())) + objective, point = rung[trial_id] + rung[trial_id] = (None, point) + + assert not bracket.is_ready() + assert not bracket.is_ready(0) + + rung[trial_id] = (objective, point) + + assert bracket.is_ready() + assert bracket.is_ready(0) + assert not bracket.is_ready(1) + assert not bracket.is_ready(2) + + bracket.rungs[1] = rung_1 + + rung = bracket.rungs[1]['results'] + trial_id = next(iter(rung.keys())) + objective, point = rung[trial_id] + rung[trial_id] = (None, point) + + assert not bracket.is_ready() # Should depend on last rung that contains trials + assert bracket.is_ready(0) + assert not bracket.is_ready(1) + assert not bracket.is_ready(2) + + rung[trial_id] = (objective, point) + + assert bracket.is_ready() # Should depend on last rung that contains trials + assert bracket.is_ready(0) + assert bracket.is_ready(1) + assert not bracket.is_ready(2) + + bracket.rungs[2] = rung_2 + + rung = bracket.rungs[2]['results'] + trial_id = next(iter(rung.keys())) + objective, point = rung[trial_id] + rung[trial_id] = (None, point) + + assert not bracket.is_ready() # Should depend on last rung that contains trials + assert bracket.is_ready(0) + assert bracket.is_ready(1) + assert not bracket.is_ready(2) + + rung[trial_id] = (objective, point) + + assert bracket.is_ready() # Should depend on last rung that contains trials + assert bracket.is_ready(0) + assert bracket.is_ready(1) + assert bracket.is_ready(2) + + def test_suggest_opt_out(self, hyperband, bracket, rung_0, rung_1, rung_2): + """Test that Hyperband opts out when rungs are not ready.""" + hyperband.brackets = [bracket] + bracket.hyperband = hyperband + + bracket.rungs[0] = rung_0 + + trial_id = next(iter(rung_0['results'].keys())) + objective, point = rung_0['results'][trial_id] + rung_0['results'][trial_id] = (None, point) + + points = hyperband.suggest() + + assert points is None + + def test_seed_rng(self, hyperband): + """Test that algo is seeded properly""" + hyperband.seed_rng(1) + a = hyperband.suggest(1) + # Hyperband will always return the full first rung + assert np.allclose(a, hyperband.suggest(1)) + + hyperband.seed_rng(2) + assert not np.allclose(a, hyperband.suggest(1)) + + def test_set_state(self, hyperband): + """Test that state is reset properly""" + hyperband.seed_rng(1) + state = hyperband.state_dict + points = hyperband.suggest(1) + # Hyperband will always return the full first rung + assert np.allclose(points, hyperband.suggest(1)) + + hyperband.seed_rng(2) + assert not np.allclose(points, hyperband.suggest(1)) + + hyperband.set_state(state) + assert np.allclose(points, hyperband.suggest(1)) + + def test_full_process(self, monkeypatch, hyperband): + """Test Hyperband full process.""" + points = [('fidelity', i) for i in range(4000)] + + def sample(num=1, seed=None): + return points[:num] + + monkeypatch.setattr(hyperband.space, 'sample', sample) + + # Fill all brackets' first rung + + for i in range(3): + point = hyperband.suggest()[0] + assert point == (9, i) + hyperband.observe([point], [{'objective': None}]) + + for i in range(3): + point = hyperband.suggest()[0] + assert point == (3, i + 3) + hyperband.observe([point], [{'objective': None}]) + + for i in range(9): + point = hyperband.suggest()[0] + assert point == (1, i + 3 + 3) + hyperband.observe([point], [{'objective': None}]) + + assert hyperband.brackets[0].has_rung_filled(0) + assert not hyperband.brackets[0].is_ready() + assert hyperband.suggest() is None + assert hyperband.suggest() is None + + # Observe first bracket first rung + + for i in range(9): + hyperband.observe([(1, i + 3 + 3)], [{'objective': 16 - i}]) + + assert hyperband.brackets[0].is_ready() + assert not hyperband.brackets[1].is_ready() + assert not hyperband.brackets[2].is_ready() + + # Promote first bracket first rung + + for i in range(3): + point = hyperband.suggest()[0] + assert point == (3, 3 + 3 + 9 - 1 - i) + hyperband.observe([point], [{'objective': None}]) + + assert hyperband.brackets[0].has_rung_filled(1) + assert not hyperband.brackets[0].is_ready() + assert not hyperband.brackets[1].is_ready() + assert not hyperband.brackets[2].is_ready() + + # Observe first bracket second rung + + for i in range(3): + hyperband.observe([(3, 3 + 3 + 9 - 1 - i)], [{'objective': 8 - i}]) + + assert hyperband.brackets[0].is_ready() + assert not hyperband.brackets[1].is_ready() + assert not hyperband.brackets[2].is_ready() + + # Observe second bracket first rung + + for i in range(3): + hyperband.observe([(3, i + 3)], [{'objective': 8 - i}]) + + assert hyperband.brackets[0].is_ready() + assert hyperband.brackets[1].is_ready() + assert not hyperband.brackets[2].is_ready() + + # Promote second bracket first rung + + for i in range(1): + point = hyperband.suggest()[0] + assert point == (9, 3 + 3 - 1 - i) + hyperband.observe([point], [{'objective': None}]) + + assert hyperband.brackets[0].is_ready() + assert hyperband.brackets[1].has_rung_filled(1) + assert not hyperband.brackets[1].is_ready() + assert not hyperband.brackets[2].is_ready() + + # Observe third bracket first rung + + for i in range(3): + hyperband.observe([(9, i)], [{'objective': 3 - i}]) + + assert not hyperband.brackets[0].is_ready(2) + assert not hyperband.brackets[1].is_ready(1) + assert hyperband.brackets[2].is_ready(0) + assert hyperband.brackets[2].is_done + + # Observe second bracket second rung + + for i in range(1): + hyperband.observe([(9, 3 + 3 - 1 - i)], [{'objective': 5 - i}]) + + assert not hyperband.brackets[0].is_ready(2) + assert hyperband.brackets[1].is_ready(1) + assert hyperband.brackets[1].is_done + + # Observe first bracket third rung + + for i in range(1): + point = hyperband.suggest()[0] + assert point == (9, 3 + 3 + 9 - 1 - 2 + i) + hyperband.observe([point], [{'objective': 3 - i}]) + + assert hyperband.is_done + assert hyperband.brackets[0].is_ready(2) + assert hyperband.brackets[0].is_done + assert hyperband.suggest() is None + + # Refresh repeat and execution times property + monkeypatch.setattr(hyperband, 'repetitions', 2) + monkeypatch.setattr(hyperband, 'executed_times', 0) + hyperband.observe([(9, 12)], [{'objective': 3 - i}]) + assert not hyperband.is_done + assert not hyperband.brackets[0].is_ready(2) + assert not hyperband.brackets[0].is_done + assert hyperband.suggest()[0] == (9, 3) diff --git a/tests/unittests/algo/test_random.py b/tests/unittests/algo/test_random.py index 1e85c8afe..86855ff93 100644 --- a/tests/unittests/algo/test_random.py +++ b/tests/unittests/algo/test_random.py @@ -43,3 +43,38 @@ def test_set_state(space): random_search.set_state(state) assert numpy.allclose(a, random_search.suggest(1)[0]) + + +def test_suggest_unique(): + """Verify that RandomSearch do not sample duplicates""" + space = Space() + space.register(Integer('yolo1', 'uniform', -3, 6)) + + random_search = Random(space) + + n_samples = 6 + values = sum(random_search.suggest(n_samples), tuple()) + assert len(values) == n_samples + assert len(set(values)) == n_samples + + +def test_suggest_unique_history(): + """Verify that RandomSearch do not sample duplicates based observed points""" + space = Space() + space.register(Integer('yolo1', 'uniform', -3, 6)) + + random_search = Random(space) + + n_samples = 3 + values = sum(random_search.suggest(n_samples), tuple()) + assert len(values) == n_samples + assert len(set(values)) == n_samples + + random_search.observe([[value] for value in values], [1] * n_samples) + + n_samples = 3 + new_values = sum(random_search.suggest(n_samples), tuple()) + assert len(new_values) == n_samples + assert len(set(new_values)) == n_samples + # No duplicates + assert (set(new_values) & set(values)) == set() diff --git a/tests/unittests/algo/test_space.py b/tests/unittests/algo/test_space.py index 1176a4aab..d2ef9d604 100644 --- a/tests/unittests/algo/test_space.py +++ b/tests/unittests/algo/test_space.py @@ -3,6 +3,7 @@ """Example usage and tests for :mod:`orion.algo.space`.""" from collections import (defaultdict, OrderedDict) +import sys import numpy as np from numpy.testing import assert_array_equal as assert_eq @@ -157,6 +158,56 @@ def test_no_prior(self): assert dim.prior is None assert dim._prior_name is 'None' + @pytest.mark.skipif(sys.version_info < (3, 6), reason="requires python3.6 or higher") + def test_get_prior_string(self): + """Test that prior string can be rebuilt.""" + dim = Dimension('yolo', 'alpha', 1, 2, 3, some='args', plus='fluff', n=4) + assert dim.get_prior_string() == 'alpha(1, 2, 3, some=\'args\', plus=\'fluff\', n=4)' + + def test_get_prior_string_uniform(self): + """Test special uniform args are handled properly.""" + dim = Dimension('yolo', 'uniform', 1, 2) + assert dim.get_prior_string() == 'uniform(1, 3)' + + def test_get_prior_string_default_values(self, monkeypatch): + """Test that default_value are included.""" + def contains(self, value): + return True + monkeypatch.setattr(Dimension, '__contains__', contains) + dim = Dimension('yolo', 'alpha', 1, 2, default_value=1) + assert dim.get_prior_string() == 'alpha(1, 2, default_value=1)' + + def test_get_prior_string_shape(self): + """Test that shape is included.""" + dim = Dimension('yolo', 'alpha', 1, 2, shape=(2, 3)) + assert dim.get_prior_string() == 'alpha(1, 2, shape=(2, 3))' + + def test_get_prior_string_loguniform(self): + """Test that special loguniform prior name is replaced properly.""" + dim = Dimension('yolo', 'reciprocal', 1e-10, 1) + assert dim.get_prior_string() == 'loguniform(1e-10, 1)' + + def test_prior_name(self): + """Test prior name is correct in dimension""" + dim = Dimension('yolo', 'reciprocal', 1e-10, 1) + assert dim.prior_name == 'reciprocal' + + dim = Dimension('yolo', 'norm', 0.9) + assert dim.prior_name == 'norm' + + dim = Real('yolo', 'uniform', 1, 2) + assert dim.prior_name == 'uniform' + + dim = Integer('yolo1', 'uniform', -3, 6) + assert dim.prior_name == 'int_uniform' + + dim = Integer('yolo1', 'norm', -3, 6) + assert dim.prior_name == 'int_norm' + + categories = {'asdfa': 0.1, 2: 0.2, 3: 0.3, 'lalala': 0.4} + dim = Categorical('yolo', categories) + assert dim.prior_name == 'choices' + class TestReal(object): """Test methods of a `Real` object.""" @@ -268,6 +319,11 @@ def test_simple_instance(self, seed): assert dim.type == 'integer' assert dim.shape == () + def test_inclusive_intervals(self): + """Test that discretized bounds are valid""" + dim = Integer('yolo', 'uniform', -3, 5.5) + assert dim.interval() == (-3, 3) + def test_contains(self): """Check for integer test.""" dim = Integer('yolo', 'uniform', -3, 6) @@ -323,6 +379,11 @@ def test_cast_array(self): dim = Integer('yolo', 'uniform', -3, 4) assert np.all(dim.cast(np.array(['1', '2'])) == np.array([1, 2])) + def test_get_prior_string_discrete(self): + """Test that discrete is included.""" + dim = Integer('yolo', 'uniform', 1, 2) + assert dim.get_prior_string() == 'uniform(1, 3, discrete=True)' + class TestCategorical(object): """Test methods of a `Categorical` object.""" @@ -405,14 +466,12 @@ def test_bad_probabilities(self): with pytest.raises(ValueError): Categorical('yolo', categories, shape=2) - def test_interval_is_banned(self): + def test_interval(self): """Check that calling `Categorical.interval` raises `RuntimeError`.""" categories = {'asdfa': 0.1, 2: 0.2, 3: 0.3, 4: 0.4} dim = Categorical('yolo', categories, shape=2) - with pytest.raises(RuntimeError) as exc: - dim.interval() - assert 'not ordered' in str(exc.value) + assert dim.interval() == ('asdfa', 2, 3, 4) def test_that_objects_types_are_ok(self): """Check that output samples are of the correct type. @@ -533,6 +592,8 @@ def test_sampling(self): assert dim.sample() == [2] dim = Fidelity('epoch', 1, 5) assert dim.sample() == [5] + dim = Fidelity('epoch', 1, 5) + assert dim.sample(4) == [5] * 4 def test_default_value(self): """Make sure Fidelity simply returns `high`""" @@ -648,6 +709,28 @@ def test_interval(self): assert space.interval() == [categories, (-3, 3), (-np.inf, np.inf)] + def test_cardinality(self): + """Check whether space capacity is correct""" + space = Space() + probs = (0.1, 0.2, 0.3, 0.4) + categories = ('asdfa', 2, 3, 4) + dim = Categorical('yolo', OrderedDict(zip(categories, probs)), shape=2) + space.register(dim) + dim = Integer('yolo2', 'uniform', -3, 6) + space.register(dim) + dim = Fidelity('epoch', 1, 9, 3) + space.register(dim) + + assert (4 * 2) * 6 * 1 == space.cardinality + + dim = Integer('yolo3', 'uniform', -3, 2, shape=(3, 1)) + space.register(dim) + assert (4 * 2) * 6 * 1 * (2 * 3 * 1) == space.cardinality + + dim = Real('yolo4', 'norm', 0.9) + space.register(dim) + assert np.inf == space.cardinality + def test_bad_setitem(self): """Check exceptions in setting items in Space.""" space = Space() @@ -725,3 +808,34 @@ def test_repr(self): "default value=None),\n"\ " Real(name=yolo3, prior={norm: (0.9,), {}}, shape=(), "\ "default value=None)])" + + def test_configuration(self): + """Test that configuration contains all dimensions.""" + space = Space() + space.register(Integer('yolo1', 'uniform', -3, 6, shape=(2,))) + space.register(Integer('yolo2', 'uniform', -3, 6, shape=(2,))) + space.register(Real('yolo3', 'norm', 0.9)) + space.register(Categorical('yolo4', ('asdfa', 2))) + + assert space.configuration == { + 'yolo1': 'uniform(-3, 3, shape=(2,), discrete=True)', + 'yolo2': 'uniform(-3, 3, shape=(2,), discrete=True)', + 'yolo3': 'norm(0.9)', + 'yolo4': 'choices([\'asdfa\', 2])'} + + def test_precision(self): + """Test that precision is correctly handled.""" + space = Space() + space.register(Real('yolo1', 'norm', 0.9, precision=6)) + space.register(Real('yolo2', 'norm', 0.9, precision=None)) + space.register(Real('yolo5', 'norm', 0.9)) + + assert space['yolo1'].precision == 6 + assert space['yolo2'].precision is None + assert space['yolo5'].precision == 4 + + with pytest.raises(TypeError): + space.register(Real('yolo3', 'norm', 0.9, precision=-12)) + + with pytest.raises(TypeError): + space.register(Real('yolo4', 'norm', 0.9, precision=0.6)) diff --git a/tests/unittests/algo/test_tpe.py b/tests/unittests/algo/test_tpe.py new file mode 100644 index 000000000..3f44adfdd --- /dev/null +++ b/tests/unittests/algo/test_tpe.py @@ -0,0 +1,659 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""Tests for :mod:`orion.algo.tpe`.""" + +import numpy +import pytest +from scipy.stats import norm + +from orion.algo.space import Categorical, Fidelity, Integer, Real, Space +from orion.algo.tpe import adaptive_parzen_estimator, CategoricalSampler, \ + compute_max_ei_point, GMMSampler, ramp_up_weights, TPE + + +@pytest.fixture() +def space(): + """Return an optimization space""" + space = Space() + + dim1 = Real('yolo1', 'uniform', -10, 20) + space.register(dim1) + + dim2 = Integer('yolo2', 'uniform', -5, 10) + space.register(dim2) + + categories = ['a', 0.1, 2, 'c'] + dim3 = Categorical('yolo3', categories) + space.register(dim3) + + return space + + +@pytest.fixture +def tpe(space): + """Return an instance of TPE.""" + return TPE(space, seed=1) + + +def test_compute_max_ei_point(): + """Test that max ei point is computed correctly""" + points = numpy.linspace(-3, 3, num=10) + below_likelis = numpy.linspace(0.5, 0.9, num=10) + above_likes = numpy.linspace(0.2, 0.5, num=10) + + numpy.random.shuffle(below_likelis) + numpy.random.shuffle(above_likes) + max_ei_index = (below_likelis - above_likes).argmax() + + max_ei_point = compute_max_ei_point(points, below_likelis, above_likes) + assert max_ei_point == points[max_ei_index] + + +def test_ramp_up_weights(): + """Test TPE adjust observed points correctly""" + weights = ramp_up_weights(25, 15, True) + assert len(weights) == 25 + assert numpy.all(weights == 1.0) + + weights = ramp_up_weights(25, 15, False) + assert len(weights) == 25 + assert numpy.all(weights[:10] == (numpy.linspace(1.0 / 25, 1.0, num=10))) + assert numpy.all(weights[10:] == 1.0) + + weights = ramp_up_weights(10, 15, False) + assert len(weights) == 10 + assert numpy.all(weights == 1.0) + + weights = ramp_up_weights(25, 0, False) + assert len(weights) == 25 + assert numpy.all(weights == (numpy.linspace(1.0 / 25, 1.0, num=25))) + + +def test_adaptive_parzen_normal_estimator(): + """Test adaptive parzen estimator""" + low = -1 + high = 5 + + obs_mus = [1.2] + mus, sigmas, weights = adaptive_parzen_estimator(obs_mus, low, high, prior_weight=1.0, + equal_weight=False, flat_num=25) + assert list(mus) == [1.2, 2] + assert list(sigmas) == [3, 6] + assert list(weights) == [1.0 / 2, 1.0 / 2] + + obs_mus = [3.4] + mus, sigmas, weights = adaptive_parzen_estimator(obs_mus, low, high, prior_weight=0.5, + equal_weight=False, flat_num=25) + assert list(mus) == [2, 3.4] + assert list(sigmas) == [6, 3] + assert list(weights) == [0.5 / 1.5, 1.0 / 1.5] + + obs_mus = numpy.linspace(-1, 5, num=30, endpoint=False) + mus, sigmas, weights = adaptive_parzen_estimator(obs_mus, low, high, prior_weight=1.0, + equal_weight=False, flat_num=25) + + ramp = numpy.linspace(1.0 / 30, 1.0, num=30 - 25) + full = numpy.ones(25 + 1) + all_weights = (numpy.concatenate([ramp, full])) + + assert len(mus) == len(sigmas) == len(weights) == 30 + 1 + assert numpy.all(weights[:30 - 25] == ramp / all_weights.sum()) + assert numpy.all(weights[30 - 25:] == 1 / all_weights.sum()) + assert numpy.all(sigmas == 6 / 10) + + +def test_adaptive_parzen_normal_estimator_weight(): + """Test the weight for the normal components""" + obs_mus = numpy.linspace(-1, 5, num=30, endpoint=False) + low = -1 + high = 5 + + # equal weight + mus, sigmas, weights = adaptive_parzen_estimator(obs_mus, low, high, prior_weight=1.0, + equal_weight=True, flat_num=25) + assert numpy.all(weights == 1 / 31) + assert numpy.all(sigmas == 6 / 10) + + # prior weight + mus, sigmas, weights = adaptive_parzen_estimator(obs_mus, low, high, prior_weight=0.5, + equal_weight=False, flat_num=25) + + ramp = numpy.linspace(1.0 / 30, 1.0, num=30 - 25) + full = numpy.ones(25 + 1) + all_weights = (numpy.concatenate([ramp, full])) + prior_pos = numpy.searchsorted(mus, 2) + all_weights[prior_pos] = 0.5 + + assert numpy.all(weights[:30 - 25] == (numpy.linspace(1.0 / 30, 1.0, num=30 - 25) / + all_weights.sum())) + assert numpy.all(weights[33 - 25:prior_pos] == 1 / all_weights.sum()) + assert weights[prior_pos] == 0.5 / all_weights.sum() + assert numpy.all(weights[prior_pos + 1:] == 1 / all_weights.sum()) + assert numpy.all(sigmas == 6 / 10) + + # full weights number + mus, sigmas, weights = adaptive_parzen_estimator(obs_mus, low, high, prior_weight=1.0, + equal_weight=False, flat_num=15) + + ramp = numpy.linspace(1.0 / 30, 1.0, num=30 - 15) + full = numpy.ones(15 + 1) + all_weights = (numpy.concatenate([ramp, full])) + prior_pos = numpy.searchsorted(mus, 2) + all_weights[prior_pos] = 1.0 + + assert numpy.all(weights[:30 - 15] == (numpy.linspace(1.0 / 30, 1.0, num=30 - 15) / + all_weights.sum())) + assert numpy.all(weights[30 - 15:] == 1 / all_weights.sum()) + assert numpy.all(sigmas == 6 / 10) + + +def test_adaptive_parzen_normal_estimator_sigma_clip(): + """Test that the magic clip of sigmas for parzen estimator""" + low = -1 + high = 5 + + obs_mus = numpy.linspace(-1, 5, num=8, endpoint=False) + mus, sigmas, weights = adaptive_parzen_estimator(obs_mus, low, high, prior_weight=1.0, + equal_weight=False, flat_num=25) + assert len(mus) == len(sigmas) == len(weights) == 8 + 1 + assert numpy.all(weights == 1 / 9) + assert numpy.all(sigmas == 6 / 8) + + obs_mus = numpy.random.uniform(-1, 5, 30) + mus, sigmas, weights = adaptive_parzen_estimator(obs_mus, low, high, prior_weight=1.0, + equal_weight=False, flat_num=25) + + assert len(mus) == len(sigmas) == len(weights) == 30 + 1 + assert numpy.all(weights[-25:] == weights[-1]) + assert numpy.all(sigmas <= 6) and numpy.all(sigmas >= 6 / 10) + + obs_mus = numpy.random.uniform(-1, 5, 400) + mus, sigmas, weights = adaptive_parzen_estimator(obs_mus, low, high, prior_weight=1.0, + equal_weight=False, flat_num=25) + + assert len(mus) == len(sigmas) == len(weights) == 400 + 1 + assert numpy.all(weights[-25:] == weights[-1]) + assert numpy.all(sigmas <= 6) and numpy.all(sigmas >= 6 / 20) + + obs_mus = numpy.random.uniform(-1, 5, 10000) + mus, sigmas, weights = adaptive_parzen_estimator(obs_mus, low, high, prior_weight=1.0, + equal_weight=False, flat_num=25) + + assert len(mus) == len(sigmas) == len(weights) == 10000 + 1 + assert numpy.all(weights[-25:] == weights[-1]) + assert numpy.all(sigmas <= 6) and numpy.all(sigmas >= 6 / 100) + + +class TestCategoricalSampler(): + """Tests for TPE Categorical Sampler""" + + def test_cat_sampler_creation(self, tpe): + """Test CategoricalSampler creation""" + obs = [0, 3, 9] + choices = list(range(-5, 5)) + cat_sampler = CategoricalSampler(tpe, obs, choices) + assert len(cat_sampler.weights) == len(choices) + + obs = [0, 3, 9] + choices = ['a', 'b', 11, 15, 17, 18, 19, 20, 25, 'c'] + cat_sampler = CategoricalSampler(tpe, obs, choices) + + assert len(cat_sampler.weights) == len(choices) + + tpe.equal_weight = True + tpe.prior_weight = 1.0 + obs = numpy.random.randint(0, 10, 100) + cat_sampler = CategoricalSampler(tpe, obs, choices) + counts_obs = numpy.bincount(obs) + 1.0 + weights = counts_obs / counts_obs.sum() + + assert numpy.all(cat_sampler.weights == weights) + + tpe.equal_weight = False + tpe.prior_weight = 0.5 + tpe.full_weight_num = 30 + obs = numpy.random.randint(0, 10, 100) + + cat_sampler = CategoricalSampler(tpe, obs, choices) + + ramp = numpy.linspace(1.0 / 100, 1.0, num=100 - 30) + full = numpy.ones(30) + ramp_weights = (numpy.concatenate([ramp, full])) + + counts_obs = numpy.bincount(obs, weights=ramp_weights) + 0.5 + weights = counts_obs / counts_obs.sum() + + assert numpy.all(cat_sampler.weights == weights) + + def test_sample(self, tpe): + """Test CategoricalSampler sample function""" + obs = numpy.random.randint(0, 10, 100) + choices = ['a', 'b', 11, 15, 17, 18, 19, 20, 25, 'c'] + cat_sampler = CategoricalSampler(tpe, obs, choices) + + points = cat_sampler.sample(25) + + assert len(points) == 25 + assert numpy.all(points >= 0) + assert numpy.all(points < 10) + + weights = numpy.linspace(1, 10, num=10) ** 3 + numpy.random.shuffle(weights) + weights = weights / weights.sum() + cat_sampler = CategoricalSampler(tpe, obs, choices) + cat_sampler.weights = weights + + points = cat_sampler.sample(10000) + points = numpy.array(points) + hist = numpy.bincount(points) + + assert numpy.all(hist.argsort() == weights.argsort()) + assert len(points) == 10000 + assert numpy.all(points >= 0) + assert numpy.all(points < 10) + + def test_get_loglikelis(self, tpe): + """Test to get log likelis of points""" + obs = numpy.random.randint(0, 10, 100) + choices = ['a', 'b', 11, 15, 17, 18, 19, 20, 25, 'c'] + cat_sampler = CategoricalSampler(tpe, obs, choices) + + points = cat_sampler.sample(25) + + likelis = cat_sampler.get_loglikelis(points) + + assert numpy.all(likelis == numpy.log(numpy.asarray(cat_sampler.weights)[points])) + + +class TestGMMSampler(): + """Tests for TPE GMM Sampler""" + + def test_gmm_sampler_creation(self, tpe): + """Test GMMSampler creation""" + mus = numpy.linspace(-3, 3, num=12, endpoint=False) + sigmas = [0.5] * 12 + + gmm_sampler = GMMSampler(tpe, mus, sigmas, -3, 3) + + assert len(gmm_sampler.weights) == 12 + assert len(gmm_sampler.pdfs) == 12 + + def test_sample(self, tpe): + """Test GMMSampler sample function""" + mus = numpy.linspace(-3, 3, num=12, endpoint=False) + sigmas = [0.5] * 12 + + gmm_sampler = GMMSampler(tpe, mus, sigmas, -3, 3) + points = gmm_sampler.sample(25) + points = numpy.array(points) + + assert len(points) <= 25 + assert numpy.all(points >= -3) + assert numpy.all(points < 3) + + mus = numpy.linspace(-10, 10, num=10, endpoint=False) + sigmas = [0.00001] * 10 + weights = numpy.linspace(1, 10, num=10) ** 3 + numpy.random.shuffle(weights) + weights = weights / weights.sum() + + gmm_sampler = GMMSampler(tpe, mus, sigmas, -11, 9, weights) + points = gmm_sampler.sample(10000) + points = numpy.array(points) + hist = numpy.histogram(points, bins=[-11, -9, -7, -5, -3, -1, 1, 3, 5, 7, 9]) + + assert numpy.all(hist[0].argsort() == numpy.array(weights).argsort()) + assert numpy.all(points >= -11) + assert numpy.all(points < 9) + + def test_get_loglikelis(self): + """Test to get log likelis of points""" + mus = numpy.linspace(-10, 10, num=10, endpoint=False) + weights = numpy.linspace(1, 10, num=10) ** 3 + numpy.random.shuffle(weights) + weights = weights / weights.sum() + + sigmas = [0.00001] * 10 + gmm_sampler = GMMSampler(tpe, mus, sigmas, -11, 9, weights) + + points = [mus[7]] + pdf = norm(mus[7], sigmas[7]) + point_likeli = numpy.log(pdf.pdf(mus[7]) * weights[7]) + likelis = gmm_sampler.get_loglikelis(points) + + assert list(likelis) == point_likeli + assert likelis[0] == point_likeli + + sigmas = [2] * 10 + gmm_sampler = GMMSampler(tpe, mus, sigmas, -11, 9, weights) + + log_pdf = [] + pdfs = [] + for i in range(10): + pdfs.append(norm(mus[i], sigmas[i])) + for pdf, weight in zip(pdfs, weights): + log_pdf.append(numpy.log(pdf.pdf(0) * weight)) + point_likeli = numpy.log(numpy.sum(numpy.exp(log_pdf))) + + points = numpy.random.uniform(-11, 9, 30) + points = numpy.insert(points, 10, 0) + likelis = gmm_sampler.get_loglikelis(points) + + point_likeli = numpy.format_float_scientific(point_likeli, precision=10) + gmm_likeli = numpy.format_float_scientific(likelis[10], precision=10) + assert point_likeli == gmm_likeli + assert len(likelis) == len(points) + + +class TestTPE(): + """Tests for the algo TPE.""" + + def test_seed_rng(self, tpe): + """Test that algo is seeded properly""" + tpe.seed_rng(1) + a = tpe.suggest(1)[0] + assert not numpy.allclose(a, tpe.suggest(1)[0]) + + tpe.seed_rng(1) + assert numpy.allclose(a, tpe.suggest(1)[0]) + + def test_set_state(self, tpe): + """Test that state is reset properly""" + tpe.seed_rng(1) + state = tpe.state_dict + a = tpe.suggest(1)[0] + assert not numpy.allclose(a, tpe.suggest(1)[0]) + + tpe.set_state(state) + assert numpy.allclose(a, tpe.suggest(1)[0]) + + def test_unsupported_space(self): + """Test tpe only work for supported search space""" + space = Space() + dim1 = Real('yolo1', 'uniform', -10, 10) + space.register(dim1) + dim2 = Real('yolo2', 'reciprocal', 10, 20) + space.register(dim2) + categories = ['a', 0.1, 2, 'c'] + dim3 = Categorical('yolo3', categories) + space.register(dim3) + dim4 = Fidelity('epoch', 1, 9, 3) + space.register(dim4) + TPE(space) + + space = Space() + dim = Real('yolo1', 'norm', 0.9) + space.register(dim) + + with pytest.raises(ValueError) as ex: + TPE(space) + + assert 'TPE now only supports uniform, loguniform, uniform discrete and choices' \ + in str(ex.value) + + space = Space() + dim = Real('yolo1', 'uniform', 0.9, shape=(2, 1)) + space.register(dim) + + with pytest.raises(ValueError) as ex: + TPE(space) + + assert 'TPE now only supports 1D shape' in str(ex.value) + + def test_split_trials(self, tpe): + """Test observed trials can be split based on TPE gamma""" + space = Space() + dim1 = Real('yolo1', 'uniform', -3, 6) + space.register(dim1) + + tpe.space = space + + points = numpy.linspace(-3, 3, num=10, endpoint=False) + results = numpy.linspace(0, 1, num=10, endpoint=False) + points_results = list(zip(points, results)) + numpy.random.shuffle(points_results) + points, results = zip(*points_results) + for point, result in zip(points, results): + tpe.observe([[point]], [{'objective': result}]) + + tpe.gamma = 0.25 + below_points, above_points = tpe.split_trials() + + assert below_points == [[-3.0], [-2.4], [-1.8]] + assert len(above_points) == 7 + + tpe.gamma = 0.2 + below_points, above_points = tpe.split_trials() + + assert below_points == [[-3.0], [-2.4]] + assert len(above_points) == 8 + + def test_sample_int_dimension(self): + """Test sample values for a integer dimension""" + space = Space() + dim1 = Integer('yolo1', 'uniform', -10, 20) + space.register(dim1) + + dim2 = Integer('yolo2', 'uniform', -5, 10, shape=(2)) + space.register(dim2) + + tpe = TPE(space) + + obs_points = numpy.random.randint(-10, 10, 100) + below_points = [obs_points[:25]] + above_points = [obs_points[25:]] + points = tpe.sample_one_dimension(dim1, 1, + below_points, above_points, tpe._sample_int_point) + points = numpy.asarray(points) + assert len(points) == 1 + assert all(points >= -10) + assert all(points < 10) + + obs_points_below = numpy.random.randint(-10, 0, 25).reshape(1, 25) + obs_points_above = numpy.random.randint(0, 10, 75).reshape(1, 75) + points = tpe.sample_one_dimension(dim1, 1, + obs_points_below, obs_points_above, tpe._sample_int_point) + points = numpy.asarray(points) + assert len(points) == 1 + assert all(points >= -10) + assert all(points < 0) + + obs_points = numpy.random.randint(-5, 5, 100) + below_points = [obs_points[:25], obs_points[25:50]] + above_points = [obs_points[50:75], obs_points[75:]] + points = tpe.sample_one_dimension(dim2, 2, + below_points, above_points, tpe._sample_int_point) + points = numpy.asarray(points) + assert len(points) == 2 + assert all(points >= -10) + assert all(points < 10) + + tpe.n_ei_candidates = 0 + points = tpe.sample_one_dimension(dim2, 2, + below_points, above_points, tpe._sample_int_point) + assert len(points) == 0 + + def test_sample_categorical_dimension(self): + """Test sample values for a categorical dimension""" + space = Space() + categories = ['a', 'b', 11, 15, 17, 18, 19, 20, 25, 'c'] + dim1 = Categorical('yolo1', categories) + space.register(dim1) + dim2 = Categorical('yolo2', categories, shape=(2)) + space.register(dim2) + + tpe = TPE(space) + + obs_points = numpy.random.randint(0, 10, 100) + obs_points = [categories[point] for point in obs_points] + below_points = [obs_points[:25]] + above_points = [obs_points[25:]] + points = tpe.sample_one_dimension(dim1, 1, + below_points, above_points, tpe._sample_categorical_point) + assert len(points) == 1 + assert points[0] in categories + + obs_points_below = numpy.random.randint(0, 3, 25) + obs_points_above = numpy.random.randint(3, 10, 75) + below_points = [[categories[point] for point in obs_points_below]] + above_points = [[categories[point] for point in obs_points_above]] + points = tpe.sample_one_dimension(dim1, 1, + below_points, above_points, tpe._sample_categorical_point) + assert len(points) == 1 + assert points[0] in categories[:3] + + obs_points = numpy.random.randint(0, 10, 100) + obs_points = [categories[point] for point in obs_points] + below_points = [obs_points[:25], obs_points[25:50]] + above_points = [obs_points[50:75], obs_points[75:]] + + points = tpe.sample_one_dimension(dim2, 2, + below_points, above_points, tpe._sample_categorical_point) + assert len(points) == 2 + assert points[0] in categories + assert points[1] in categories + + tpe.n_ei_candidates = 0 + points = tpe.sample_one_dimension(dim2, 2, + below_points, above_points, tpe._sample_categorical_point) + assert len(points) == 0 + + def test_sample_real_dimension(self): + """Test sample values for a real dimension""" + space = Space() + dim1 = Real('yolo1', 'uniform', -10, 20) + space.register(dim1) + dim2 = Real('yolo2', 'uniform', -5, 10, shape=(2)) + space.register(dim2) + dim3 = Real('yolo3', 'reciprocal', 1, 20) + space.register(dim3) + + tpe = TPE(space) + points = numpy.random.uniform(-10, 10, 20) + below_points = [points[:8]] + above_points = [points[8:]] + points = tpe._sample_real_dimension(dim1, 1, + below_points, above_points) + points = numpy.asarray(points) + assert len(points) == 1 + assert all(points >= -10) + assert all(points < 10) + + points = numpy.random.uniform(1, 20, 20) + below_points = [points[:8]] + above_points = [points[8:]] + points = tpe._sample_real_dimension(dim3, 1, + below_points, above_points) + points = numpy.asarray(points) + assert len(points) == 1 + assert all(points >= 1) + assert all(points < 20) + + below_points = numpy.random.uniform(-10, 0, 25).reshape(1, 25) + above_points = numpy.random.uniform(0, 10, 75).reshape(1, 75) + points = tpe._sample_real_dimension(dim1, 1, + below_points, above_points) + points = numpy.asarray(points) + assert len(points) == 1 + assert all(points >= -10) + assert all(points < 0) + + points = numpy.random.uniform(-5, 5, 32) + below_points = [points[:8], points[8:16]] + above_points = [points[16:24], points[24:]] + points = tpe._sample_real_dimension(dim2, 2, + below_points, above_points) + points = numpy.asarray(points) + assert len(points) == 2 + assert all(points >= -10) + assert all(points < 10) + + tpe.n_ei_candidates = 0 + points = tpe._sample_real_dimension(dim2, 2, + below_points, above_points) + assert len(points) == 0 + + def test_suggest(self, tpe): + """Test suggest with no shape dimensions""" + tpe.n_initial_points = 10 + results = numpy.random.random(10) + for i in range(10): + point = tpe.suggest(1) + assert len(point) == 1 + assert len(point[0]) == 3 + assert not isinstance(point[0][0], tuple) + tpe.observe(point, [{'objective': results[i]}]) + + point = tpe.suggest(1) + assert len(point) == 1 + assert len(point[0]) == 3 + assert not isinstance(point[0][0], tuple) + + def test_1d_shape(self, tpe): + """Test suggest with 1D shape dimensions""" + space = Space() + dim1 = Real('yolo1', 'uniform', -3, 6, shape=(2)) + space.register(dim1) + dim2 = Real('yolo2', 'uniform', -2, 4) + space.register(dim2) + + tpe.space = space + + tpe.n_initial_points = 10 + results = numpy.random.random(10) + for i in range(10): + point = tpe.suggest(1) + assert len(point) == 1 + assert len(point[0]) == 2 + assert len(point[0][0]) == 2 + tpe.observe(point, [{'objective': results[i]}]) + + point = tpe.suggest(1) + assert len(point) == 1 + assert len(point[0]) == 2 + assert len(point[0][0]) == 2 + + def test_suggest_initial_points(self, tpe, monkeypatch): + """Test that initial points can be sampled correctly""" + points = [(i, i - 6, 'c') for i in range(1, 12)] + + global index + index = 0 + + def sample(num=1, seed=None): + global index + pts = points[index:index + num] + index += num + return pts + + monkeypatch.setattr(tpe.space, 'sample', sample) + + tpe.n_initial_points = 10 + results = numpy.random.random(10) + for i in range(1, 11): + point = tpe.suggest(1)[0] + assert point == (i, i - 6, 'c') + tpe.observe([point], [{'objective': results[i - 1]}]) + + point = tpe.suggest(1)[0] + assert point != (11, 5, 'c') + + def test_suggest_ei_candidates(self, tpe): + """Test suggest with no shape dimensions""" + tpe.n_initial_points = 2 + tpe.n_ei_candidates = 0 + + results = numpy.random.random(2) + for i in range(2): + point = tpe.suggest(1) + assert len(point) == 1 + assert len(point[0]) == 3 + assert not isinstance(point[0][0], tuple) + tpe.observe(point, [{'objective': results[i]}]) + + point = tpe.suggest(1) + assert not point + + tpe.n_ei_candidates = 24 + point = tpe.suggest(1) + assert len(point) > 0 diff --git a/tests/unittests/client/test_client.py b/tests/unittests/client/test_client.py index 9e2ed20c0..11163217c 100644 --- a/tests/unittests/client/test_client.py +++ b/tests/unittests/client/test_client.py @@ -1,13 +1,61 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- """Example usage and tests for :mod:`orion.client`.""" - +import copy from importlib import reload import json import pytest -from orion import client +import orion.client +import orion.client.cli as cli +import orion.core +from orion.core.io.database.ephemeraldb import EphemeralDB +from orion.core.io.database.pickleddb import PickledDB +from orion.core.utils import SingletonNotInstantiatedError +from orion.core.utils.exceptions import BranchingEvent, NoConfigurationError, RaceCondition +from orion.core.utils.tests import OrionState, update_singletons +from orion.storage.base import get_storage +from orion.storage.legacy import Legacy + + +create_experiment = orion.client.create_experiment +workon = orion.client.workon + + +config = dict( + name='supernaekei', + space={'x': 'uniform(0, 200)'}, + metadata={'user': 'tsirif', + 'orion_version': 'XYZ', + 'VCS': {"type": "git", + "is_dirty": False, + "HEAD_sha": "test", + "active_branch": None, + "diff_sha": "diff"}}, + version=1, + pool_size=1, + max_trials=10, + working_dir='', + algorithms={'random': {'seed': 1}}, + producer={'strategy': 'NoParallelStrategy'}, + refers=dict( + root_id='supernaekei', + parent_id=None, + adapter=[]) + ) + + +@pytest.fixture() +def user_config(): + """Curate config as a user would provide it""" + user_config = copy.deepcopy(config) + user_config.pop('metadata') + user_config.pop('version') + user_config['strategy'] = user_config.pop('producer')['strategy'] + user_config.pop('refers') + user_config.pop('pool_size') + return user_config @pytest.fixture() @@ -25,7 +73,7 @@ def test_with_no_env(self, monkeypatch, capsys, data): Then: It should print `data` parameter instead to stdout. """ monkeypatch.delenv('ORION_RESULTS_PATH', raising=False) - reloaded_client = reload(client) + reloaded_client = reload(cli) assert reloaded_client.IS_ORION_ON is False assert reloaded_client.RESULTS_FILENAME is None @@ -45,7 +93,7 @@ def test_with_correct_env(self, monkeypatch, capsys, tmpdir, data): with open(path, mode='w'): pass monkeypatch.setenv('ORION_RESULTS_PATH', path) - reloaded_client = reload(client) + reloaded_client = reload(cli) assert reloaded_client.IS_ORION_ON is True assert reloaded_client.RESULTS_FILENAME == path @@ -69,7 +117,7 @@ def test_with_env_set_but_no_file_exists(self, monkeypatch, tmpdir, data): monkeypatch.setenv('ORION_RESULTS_PATH', path) with pytest.raises(RuntimeWarning) as exc: - reload(client) + reload(cli) assert "existing file" in str(exc.value) @@ -78,7 +126,7 @@ def test_call_interface_twice(self, monkeypatch, data): if function has already been called once. """ monkeypatch.delenv('ORION_RESULTS_PATH', raising=False) - reloaded_client = reload(client) + reloaded_client = reload(cli) reloaded_client.report_results(data) with pytest.raises(RuntimeWarning) as exc: @@ -88,3 +136,269 @@ def test_call_interface_twice(self, monkeypatch, data): assert reloaded_client.IS_ORION_ON is False assert reloaded_client.RESULTS_FILENAME is None assert reloaded_client._HAS_REPORTED_RESULTS is True + + +class TestCreateExperiment: + """Test creation of experiment with `client.create_experiment()`""" + + @pytest.mark.usefixtures("setup_pickleddb_database") + def test_create_experiment_no_storage(self, monkeypatch): + """Test creation if storage is not configured""" + name = 'oopsie_forgot_a_storage' + host = orion.core.config.storage.database.host + + with OrionState(storage=orion.core.config.storage.to_dict()) as cfg: + # Reset the Storage and drop instances so that get_storage() would fail. + cfg.cleanup() + cfg.singletons = update_singletons() + + # Make sure storage must be instantiated during `create_experiment()` + with pytest.raises(SingletonNotInstantiatedError): + get_storage() + + experiment = create_experiment(name=name, space={'x': 'uniform(0, 10)'}) + + assert isinstance(experiment._experiment._storage, Legacy) + assert isinstance(experiment._experiment._storage._db, PickledDB) + assert experiment._experiment._storage._db.host == host + + def test_create_experiment_new_no_space(self): + """Test that new experiment needs space""" + with OrionState(): + name = 'oopsie_forgot_a_space' + with pytest.raises(NoConfigurationError) as exc: + create_experiment(name=name) + + assert 'Experiment {} does not exist in DB'.format(name) in str(exc.value) + + def test_create_experiment_bad_storage(self): + """Test error message if storage is not configured properly""" + name = 'oopsie_bad_storage' + # Make sure there is no existing storage singleton + update_singletons() + + with pytest.raises(NotImplementedError) as exc: + create_experiment(name=name, storage={'type': 'legacy', + 'database': {'type': 'idontexist'}}) + + assert "Could not find implementation of AbstractDB, type = 'idontexist'" in str(exc.value) + + def test_create_experiment_new_default(self): + """Test creating a new experiment with all defaults""" + name = 'all_default' + space = {'x': 'uniform(0, 10)'} + with OrionState(): + experiment = create_experiment(name='all_default', space=space) + + assert experiment.name == name + assert experiment.space.configuration == space + + assert experiment.max_trials == orion.core.config.experiment.max_trials + assert experiment.working_dir == orion.core.config.experiment.working_dir + assert experiment.algorithms.configuration == {'random': {'seed': None}} + assert experiment.configuration['producer'] == {'strategy': 'MaxParallelStrategy'} + + def test_create_experiment_new_full_config(self, user_config): + """Test creating a new experiment by specifying all attributes.""" + with OrionState(): + experiment = create_experiment(**user_config) + + exp_config = experiment.configuration + + assert exp_config['space'] == config['space'] + assert exp_config['max_trials'] == config['max_trials'] + assert exp_config['working_dir'] == config['working_dir'] + assert exp_config['algorithms'] == config['algorithms'] + assert exp_config['producer'] == config['producer'] + + def test_create_experiment_hit_no_branch(self, user_config): + """Test creating an existing experiment by specifying all identical attributes.""" + with OrionState(experiments=[config]): + experiment = create_experiment(**user_config) + + exp_config = experiment.configuration + + assert experiment.name == config['name'] + assert experiment.version == 1 + assert exp_config['space'] == config['space'] + assert exp_config['max_trials'] == config['max_trials'] + assert exp_config['working_dir'] == config['working_dir'] + assert exp_config['algorithms'] == config['algorithms'] + assert exp_config['producer'] == config['producer'] + + def test_create_experiment_hit_no_config(self): + """Test creating an existing experiment by specifying the name only.""" + with OrionState(experiments=[config]): + experiment = create_experiment(config['name']) + + assert experiment.name == config['name'] + assert experiment.version == 1 + assert experiment.space.configuration == config['space'] + assert experiment.algorithms.configuration == config['algorithms'] + assert experiment.max_trials == config['max_trials'] + assert experiment.working_dir == config['working_dir'] + assert experiment.producer['strategy'].configuration == config['producer']['strategy'] + + def test_create_experiment_hit_branch(self): + """Test creating a differing experiment that cause branching.""" + with OrionState(experiments=[config]): + experiment = create_experiment(config['name'], space={'y': 'uniform(0, 10)'}) + + assert experiment.name == config['name'] + assert experiment.version == 2 + + assert experiment.algorithms.configuration == config['algorithms'] + assert experiment.max_trials == config['max_trials'] + assert experiment.working_dir == config['working_dir'] + assert experiment.producer['strategy'].configuration == config['producer']['strategy'] + + def test_create_experiment_race_condition(self, monkeypatch): + """Test that a single race condition is handled seemlessly + + RaceCondition during registration is already handled by `build()`, therefore we will only + test for race conditions during version update. + """ + with OrionState(experiments=[config]): + parent = create_experiment(config['name']) + child = create_experiment(config['name'], space={'y': 'uniform(0, 10)'}) + + def insert_race_condition(self, query): + is_auto_version_query = ( + query == {'name': config['name'], 'refers.parent_id': parent.id}) + if is_auto_version_query: + data = [child.configuration] + # First time the query returns no other child + elif insert_race_condition.count < 1: + data = [parent.configuration] + else: + data = [parent.configuration, child.configuration] + + insert_race_condition.count += int(is_auto_version_query) + + return data + + insert_race_condition.count = 0 + + monkeypatch.setattr(get_storage().__class__, 'fetch_experiments', + insert_race_condition) + + experiment = create_experiment(config['name'], space={'y': 'uniform(0, 10)'}) + + assert insert_race_condition.count == 1 + assert experiment.version == 2 + assert experiment.configuration == child.configuration + + def test_create_experiment_race_condition_broken(self, monkeypatch): + """Test that two or more race condition leads to raise""" + with OrionState(experiments=[config]): + parent = create_experiment(config['name']) + child = create_experiment(config['name'], space={'y': 'uniform(0, 10)'}) + + def insert_race_condition(self, query): + is_auto_version_query = ( + query == {'name': config['name'], 'refers.parent_id': parent.id}) + if is_auto_version_query: + data = [child.configuration] + # The query returns no other child, never! + else: + data = [parent.configuration] + + insert_race_condition.count += int(is_auto_version_query) + + return data + + insert_race_condition.count = 0 + + monkeypatch.setattr(get_storage().__class__, 'fetch_experiments', + insert_race_condition) + + with pytest.raises(RaceCondition) as exc: + create_experiment(config['name'], space={'y': 'uniform(0, 10)'}) + + assert insert_race_condition.count == 2 + assert 'There was a race condition during branching and new version' in str(exc.value) + + def test_create_experiment_hit_manual_branch(self): + """Test creating a differing experiment that cause branching.""" + new_space = {'y': 'uniform(0, 10)'} + with OrionState(experiments=[config]): + create_experiment(config['name'], space=new_space) + + with pytest.raises(BranchingEvent) as exc: + create_experiment(config['name'], version=1, space=new_space) + + assert "Configuration is different and generates" in str(exc.value) + + def test_create_experiment_debug_mode(self): + """Test that EphemeralDB is used in debug mode whatever the storage config given""" + update_singletons() + + create_experiment( + config['name'], space={'x': 'uniform(0, 10)'}, + storage={'type': 'legacy', 'database': {'type': 'pickleddb'}}) + + storage = get_storage() + + assert isinstance(storage, Legacy) + assert isinstance(storage._db, PickledDB) + + update_singletons() + + create_experiment( + config['name'], space={'x': 'uniform(0, 10)'}, + storage={'type': 'legacy', 'database': {'type': 'pickleddb'}}, debug=True) + + storage = get_storage() + + assert isinstance(storage, Legacy) + assert isinstance(storage._db, EphemeralDB) + + +class TestWorkon: + """Test the helper function for sequential API""" + + def test_workon(self): + """Verify that workon processes properly""" + def foo(x): + return [dict(name='result', type='objective', value=x * 2)] + + experiment = workon(foo, space={'x': 'uniform(0, 10)'}, max_trials=5) + assert len(experiment.fetch_trials()) == 5 + assert experiment.name == 'loop' + assert isinstance(experiment._experiment._storage, Legacy) + assert isinstance(experiment._experiment._storage._db, EphemeralDB) + + def test_workon_algo(self): + """Verify that algo config is processed properly""" + def foo(x): + return [dict(name='result', type='objective', value=x * 2)] + + experiment = workon( + foo, space={'x': 'uniform(0, 10)'}, max_trials=5, + algorithms={'random': {'seed': 5}}) + + assert experiment.algorithms.algorithm.seed == 5 + + def test_workon_name(self): + """Verify setting the name with workon""" + def foo(x): + return [dict(name='result', type='objective', value=x * 2)] + + experiment = workon(foo, space={'x': 'uniform(0, 10)'}, max_trials=5, name='voici') + + assert experiment.name == 'voici' + + def test_workon_twice(self): + """Verify setting the each experiment has its own storage""" + def foo(x): + return [dict(name='result', type='objective', value=x * 2)] + + experiment = workon(foo, space={'x': 'uniform(0, 10)'}, max_trials=5, name='voici') + + assert experiment.name == 'voici' + assert len(experiment.fetch_trials()) == 5 + + experiment2 = workon(foo, space={'x': 'uniform(0, 10)'}, max_trials=1, name='voici') + + assert experiment2.name == 'voici' + assert len(experiment2.fetch_trials()) == 1 diff --git a/tests/unittests/client/test_experiment_client.py b/tests/unittests/client/test_experiment_client.py new file mode 100644 index 000000000..b50e4353d --- /dev/null +++ b/tests/unittests/client/test_experiment_client.py @@ -0,0 +1,856 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""Example usage and tests for :mod:`orion.client.experiment`.""" +import atexit +from contextlib import contextmanager +import copy +import datetime +import logging + +import pytest + +from orion.client.experiment import ExperimentClient +import orion.core +from orion.core.io.database import DuplicateKeyError +import orion.core.io.experiment_builder as experiment_builder +from orion.core.utils.exceptions import BrokenExperiment, SampleTimeout +from orion.core.utils.tests import OrionState +from orion.core.worker.producer import Producer +from orion.core.worker.trial import Trial +from orion.storage.base import get_storage + + +config = dict( + name='supernaekei', + space={'x': 'uniform(0, 200)'}, + metadata={'user': 'tsirif', + 'orion_version': 'XYZ', + 'VCS': {"type": "git", + "is_dirty": False, + "HEAD_sha": "test", + "active_branch": None, + "diff_sha": "diff"}}, + version=1, + pool_size=1, + max_trials=10, + working_dir='', + algorithms={'random': {'seed': 1}}, + producer={'strategy': 'NoParallelStrategy'}, + refers=dict( + root_id='supernaekei', + parent_id=None, + adapter=[]) + ) + + +def _generate(obj, *args, value): + if obj is None: + return None + + obj = copy.deepcopy(obj) + data = obj + + for arg in args[:-1]: + data = data[arg] + + data[args[-1]] = value + return obj + + +base_trial = { + 'experiment': 0, + 'status': 'new', # new, reserved, suspended, completed, broken + 'worker': None, + 'start_time': None, + 'end_time': None, + 'heartbeat': None, + 'results': [], + 'params': [] +} + + +def generate_trials(trial_config, status): + """Generate Trials with different configurations""" + new_trials = [_generate(trial_config, 'status', value=s) for s in status] + + for i, trial in enumerate(new_trials): + trial['submit_time'] = datetime.datetime.utcnow() + datetime.timedelta(seconds=i) + if trial['status'] != 'new': + trial['start_time'] = datetime.datetime.utcnow() + datetime.timedelta(seconds=i) + + for i, trial in enumerate(new_trials): + if trial['status'] == 'completed': + trial['end_time'] = datetime.datetime.utcnow() + datetime.timedelta(seconds=i) + + # make each trial unique + for i, trial in enumerate(new_trials): + if trial['status'] == 'completed': + trial['results'].append({ + 'name': 'loss', + 'type': 'objective', + 'value': i}) + + trial['params'].append({ + 'name': 'x', + 'type': 'real', + 'value': i + }) + + return new_trials + + +@contextmanager +def create_experiment(exp_config=None, trial_config=None, stati=None): + """Context manager for the creation of an ExperimentClient and storage init""" + if exp_config is None: + exp_config = config + if trial_config is None: + trial_config = base_trial + if stati is None: + stati = ['new', 'interrupted', 'suspended', 'reserved', 'completed'] + + with OrionState(experiments=[exp_config], trials=generate_trials(trial_config, stati)) as cfg: + experiment = experiment_builder.build(name=exp_config['name']) + if cfg.trials: + experiment._id = cfg.trials[0]['experiment'] + client = ExperimentClient(experiment, Producer(experiment)) + yield cfg, experiment, client + + client.close() + + +def compare_trials(trials_a, trials_b): + """Compare two trials by using their configuration""" + def to_dict(trial): + return trial.to_dict() + + assert list(map(to_dict, trials_a)) == list(map(to_dict, trials_b)) + + +def compare_without_heartbeat(trial_a, trial_b): + """Compare trials configuration ommiting heartbeat""" + trial_a_dict = trial_a.to_dict() + trial_b_dict = trial_b.to_dict() + trial_a_dict.pop('heartbeat') + trial_b_dict.pop('heartbeat') + assert trial_a_dict == trial_b_dict + + +def test_experiment_fetch_trials(): + """Test compliance of client and experiment `fetch_trials()`""" + with create_experiment() as (cfg, experiment, client): + assert len(experiment.fetch_trials()) == 5 + compare_trials(experiment.fetch_trials(), client.fetch_trials()) + + +def test_experiment_get_trial(): + """Test compliance of client and experiment `get_trial()`""" + with create_experiment() as (cfg, experiment, client): + assert experiment.get_trial(uid=0) == client.get_trial(uid=0) + + +def test_experiment_fetch_trials_by_status(): + """Test compliance of client and experiment `fetch_trials_by_status()`""" + with create_experiment() as (cfg, experiment, client): + compare_trials(experiment.fetch_trials_by_status('completed'), + client.fetch_trials_by_status('completed')) + + +def test_experiment_fetch_non_completed_trials(): + """Test compliance of client and experiment `fetch_noncompleted_trials()`""" + with create_experiment() as (cfg, experiment, client): + compare_trials(experiment.fetch_noncompleted_trials(), client.fetch_noncompleted_trials()) + + +class TestInsert: + """Tests for ExperimentClient.insert""" + + def test_insert_params_wo_results(self): + """Test insertion without results without reservation""" + with create_experiment() as (cfg, experiment, client): + trial = client.insert(dict(x=100)) + assert trial.status == 'interrupted' + assert trial.params['x'] == 100 + assert trial.id in set(trial.id for trial in experiment.fetch_trials()) + compare_without_heartbeat(trial, client.get_trial(uid=trial.id)) + + assert client._pacemakers == {} + + def test_insert_params_with_results(self): + """Test insertion with results without reservation""" + with create_experiment() as (cfg, experiment, client): + timestamp = datetime.datetime.utcnow() + trial = client.insert(dict(x=100), + [dict(name='objective', type='objective', value=101)]) + assert trial.status == 'completed' + assert trial.params['x'] == 100 + assert trial.objective.value == 101 + assert trial.end_time >= timestamp + assert trial.id in set(trial.id for trial in experiment.fetch_trials()) + compare_without_heartbeat(trial, client.get_trial(uid=trial.id)) + assert client.get_trial(uid=trial.id).objective.value == 101 + + assert client._pacemakers == {} + + def test_insert_params_with_results_and_reserve(self): + """Test insertion with results and reservation""" + with create_experiment() as (cfg, experiment, client): + with pytest.raises(ValueError) as exc: + client.insert( + dict(x=100), + [dict(name='objective', type='objective', value=101)], + reserve=True) + + assert 'Cannot observe a trial and reserve it' in str(exc.value) + + def test_insert_existing_params(self): + """Test that duplicated trials cannot be saved in storage""" + with create_experiment() as (cfg, experiment, client): + with pytest.raises(DuplicateKeyError) as exc: + client.insert(dict(x=1)) + + assert ('A trial with params {\'x\': 1} already exist for experiment supernaekei-v1' == + str(exc.value)) + + assert client._pacemakers == {} + + def test_insert_partial_params(self): + """Test that trial with missing dimension that has a default value can be saved""" + config_with_default = copy.deepcopy(config) + config_with_default['space']['y'] = 'uniform(0, 10, default_value=5)' + trial_with_default = copy.deepcopy(base_trial) + trial_with_default['params'].append({'name': 'y', 'type': 'real', 'value': 1}) + with create_experiment(config_with_default, trial_with_default) as (_, experiment, client): + trial = client.insert(dict(x=100)) + + assert trial.status == 'interrupted' + assert trial.params['x'] == 100 + assert trial.params['y'] == 5 + assert trial.id in set(trial.id for trial in experiment.fetch_trials()) + compare_without_heartbeat(trial, client.get_trial(uid=trial.id)) + + assert client._pacemakers == {} + + def test_insert_partial_params_missing(self): + """Test that trial with missing dimension cannot be saved""" + config_with_default = copy.deepcopy(config) + config_with_default['space']['y'] = 'uniform(0, 10)' + trial_with_default = copy.deepcopy(base_trial) + trial_with_default['params'].append({'name': 'y', 'type': 'real', 'value': 1}) + with create_experiment(config_with_default, trial_with_default) as (_, _, client): + with pytest.raises(ValueError) as exc: + client.insert(dict(x=1)) + + assert ('Dimension y not specified and does not have a default value.' == + str(exc.value)) + + def test_insert_params_and_reserve(self): + """Test that new trial is reserved properly with `reserve=True`""" + with create_experiment() as (cfg, experiment, client): + trial = client.insert(dict(x=100), reserve=True) + assert trial.status == 'reserved' + assert client._pacemakers[trial.id].is_alive() + client._pacemakers.pop(trial.id).stop() + + def test_insert_params_fails_not_reserved(self): + """Test that failed insertion because of duplicated trials will not reserve the original + trial + """ + with create_experiment() as (cfg, experiment, client): + with pytest.raises(DuplicateKeyError): + client.insert(dict(x=1), reserve=True) + + assert client._pacemakers == {} + + def test_insert_bad_params(self): + """Test that bad params cannot be registered in storage""" + with create_experiment() as (cfg, experiment, client): + with pytest.raises(ValueError) as exc: + client.insert(dict(x='bad bad bad')) + + assert ('Dimension x value bad bad bad is outside of prior uniform(0, 200)' == + str(exc.value)) + assert client._pacemakers == {} + + def test_insert_params_bad_results(self): + """Test that results with from format cannot be saved (trial is registered anyhow)""" + with create_experiment() as (cfg, experiment, client): + with pytest.raises(ValueError) as exc: + client.insert(dict(x=100), [dict(name='objective', type='bad bad bad', value=0)]) + + assert 'Given type, bad bad bad, not one of: ' in str(exc.value) + assert client._pacemakers == {} + + +class TestReserve: + """Tests for ExperimentClient.reserve""" + + def test_reserve(self): + """Test reservation of registered trials""" + with create_experiment() as (cfg, experiment, client): + trial = experiment.get_trial(uid=cfg.trials[1]['_id']) + assert trial.status != 'reserved' + client.reserve(trial) + assert trial.status == 'reserved' + assert experiment.get_trial(trial).status == 'reserved' + assert client._pacemakers[trial.id].is_alive() + client._pacemakers.pop(trial.id).stop() + + def test_reserve_dont_exist(self): + """Verify that unregistered trials cannot be reserved.""" + with create_experiment() as (cfg, experiment, client): + trial = Trial(experiment='idontexist', params=cfg.trials[0]['params']) + with pytest.raises(ValueError) as exc: + client.reserve(trial) + + assert 'Trial {} does not exist in database.'.format(trial.id) == str(exc.value) + assert client._pacemakers == {} + + def test_reserve_reserved_locally(self, caplog): + """Verify that a trial cannot be reserved twice locally (warning, no exception)""" + with create_experiment() as (cfg, experiment, client): + trial = experiment.get_trial(uid=cfg.trials[1]['_id']) + assert trial.status != 'reserved' + client.reserve(trial) + with caplog.at_level(logging.WARNING): + client.reserve(trial) + + assert 'Trial {} is already reserved.'.format(trial.id) == caplog.records[-1].message + + assert client._pacemakers[trial.id].is_alive() + client._pacemakers.pop(trial.id).stop() + + def test_reserve_reserved_remotely(self): + """Verify that a trial cannot be reserved if already reserved by another process""" + with create_experiment() as (cfg, experiment, client): + trial = Trial(**cfg.trials[1]) + assert trial.status == 'interrupted' + client.reserve(trial) + remote_pacemaker = client._pacemakers.pop(trial.id) + assert experiment.get_trial(trial).status == 'reserved' + + trial = Trial(**cfg.trials[1]) + assert trial.status == 'interrupted' + with pytest.raises(RuntimeError) as exc: + client.reserve(trial) + + assert 'Could not reserve trial {}.'.format(trial.id) == str(exc.value) + + assert trial.status == 'interrupted' + assert experiment.get_trial(trial).status == 'reserved' + assert client._pacemakers == {} + remote_pacemaker.stop() + + def test_reserve_race_condition(self): + """Verify that race conditions during `reserve` is detected and raises a comprehensible + error + """ + with create_experiment() as (cfg, experiment, client): + trial = client.get_trial(uid=cfg.trials[0]['_id']) + experiment.set_trial_status(trial, 'reserved') + trial.status = 'new' # Let's pretend it is still available + + with pytest.raises(RuntimeError) as exc: + client.reserve(trial) + + assert 'Could not reserve trial {}.'.format(trial.id) == str(exc.value) + assert client._pacemakers == {} + + +class TestRelease: + """Tests for ExperimentClient.release""" + + def test_release(self): + """Test releasing (to interrupted)""" + with create_experiment() as (cfg, experiment, client): + trial = experiment.get_trial(uid=cfg.trials[1]['_id']) + client.reserve(trial) + pacemaker = client._pacemakers[trial.id] + client.release(trial) + assert trial.status == 'interrupted' + assert experiment.get_trial(trial).status == 'interrupted' + assert trial.id not in client._pacemakers + assert not pacemaker.is_alive() + + def test_release_status(self): + """Test releasing with a specific status""" + with create_experiment() as (cfg, experiment, client): + trial = experiment.get_trial(uid=cfg.trials[1]['_id']) + client.reserve(trial) + pacemaker = client._pacemakers[trial.id] + client.release(trial, 'broken') + assert trial.status == 'broken' + assert experiment.get_trial(trial).status == 'broken' + assert trial.id not in client._pacemakers + assert not pacemaker.is_alive() + + def test_release_invalid_status(self): + """Test releasing with a specific status""" + with create_experiment() as (cfg, experiment, client): + trial = experiment.get_trial(uid=cfg.trials[1]['_id']) + client.reserve(trial) + with pytest.raises(ValueError) as exc: + client.release(trial, 'mouf mouf') + + assert exc.match('Given status `mouf mouf` not one of') + + def test_release_dont_exist(self, monkeypatch): + """Verify that unregistered trials cannot be released""" + with create_experiment() as (cfg, experiment, client): + trial = Trial(experiment='idontexist', params=cfg.trials[1]['params']) + + def do_nada(trial): + """Don't do anything""" + return None + monkeypatch.setattr(client, '_release_reservation', do_nada) + + with pytest.raises(ValueError) as exc: + client.release(trial) + + assert 'Trial {} does not exist in database.'.format(trial.id) == str(exc.value) + assert client._pacemakers == {} + + def test_release_race_condition(self): + """Verify that race conditions during `release` is detected and raises a comprehensible + error + """ + with create_experiment() as (cfg, experiment, client): + trial = client.get_trial(uid=cfg.trials[1]['_id']) + client.reserve(trial) + pacemaker = client._pacemakers[trial.id] + # Woops! Trial got failed over from another process. + experiment.set_trial_status(trial, 'interrupted') + trial.status = 'reserved' # Let's pretend we don't know. + + with pytest.raises(RuntimeError) as exc: + client.release(trial) + + assert ('Reservation for trial {} has been lost before release.'.format(trial.id) + in str(exc.value)) + assert client._pacemakers == {} + assert not pacemaker.is_alive() + + def test_release_unreserved(self): + """Verify that unreserved trials cannot be released""" + with create_experiment() as (cfg, experiment, client): + trial = client.get_trial(uid=cfg.trials[1]['_id']) + with pytest.raises(RuntimeError) as exc: + client.release(trial) + + assert ('Trial {} had no pacemakers. Was is reserved properly?'.format(trial.id) == + str(exc.value)) + + assert client._pacemakers == {} + + +class TestClose: + """Test close method of the client""" + + def test_close_empty(self): + """Test client can close when no trial is reserved""" + with create_experiment() as (cfg, experiment, client): + client.close() + + def test_close_with_reserved(self): + """Test client cannot be closed if trials are reserved.""" + with create_experiment() as (cfg, experiment, client): + trial = client.suggest() + + with pytest.raises(RuntimeError) as exc: + client.close() + + assert "There is still reserved trials" in str(exc.value) + + client.release(trial) + + def test_close_unregister_atexit(self, monkeypatch): + """Test close properly unregister the atexit function""" + def please_dont_call_me(client): + raise RuntimeError("Please don't call me!!!") + + monkeypatch.setattr('orion.client.experiment.set_broken_trials', please_dont_call_me) + + with create_experiment() as (cfg, experiment, client): + # The registered function in atexit is called as expected + with pytest.raises(RuntimeError) as exc: + atexit._run_exitfuncs() + + assert "Please don't call me!!!" == str(exc.value) + + # Unregister the function + client.close() + + +class TestBroken: + """Test handling of broken trials with atexit()""" + + def test_broken_trial(self): + """Test that broken trials are detected""" + with create_experiment() as (cfg, experiment, client): + trial = client.suggest() + assert trial.status == 'reserved' + + atexit._run_exitfuncs() + + assert client._pacemakers == {} + assert client.get_trial(trial).status == 'broken' + + def test_atexit_with_multiple_clients(self): + """Test that each client has a separate atexit function""" + config1 = copy.deepcopy(config) + config2 = copy.deepcopy(config) + config2['name'] = 'cloned' + with create_experiment(exp_config=config1) as (_, _, client1): + with create_experiment(exp_config=config2) as (_, _, client2): + trial1 = client1.suggest() + trial2 = client2.suggest() + + assert trial1.status == 'reserved' + assert trial2.status == 'reserved' + + atexit._run_exitfuncs() + + assert client1._pacemakers == {} + assert client2._pacemakers == {} + assert client1.get_trial(trial1).status == 'broken' + assert client2.get_trial(trial2).status == 'broken' + + def test_atexit_with_multiple_clients_unregister(self, monkeypatch): + """Test that each client has a separate atexit function that can be unregistered""" + config1 = copy.deepcopy(config) + config2 = copy.deepcopy(config) + config2['name'] = 'cloned' + with create_experiment(exp_config=config1) as (_, _, client1): + + def please_dont_call_me(client): + raise RuntimeError("Please don't call me!!!") + + monkeypatch.setattr('orion.client.experiment.set_broken_trials', please_dont_call_me) + + with create_experiment(exp_config=config2) as (_, _, client2): + trial1 = client1.suggest() + trial2 = client2.suggest() + + # The registered function in atexit is called as expected + with pytest.raises(RuntimeError) as exc: + atexit._run_exitfuncs() + + assert "Please don't call me!!!" == str(exc.value) + + # Unregister the function + client2.release(trial2) + client2.close() + + # It should not be called + atexit._run_exitfuncs() + + assert client1._pacemakers == {} + assert client2._pacemakers == {} + assert client1.get_trial(trial1).status == 'broken' + assert client2.get_trial(trial2).status == 'interrupted' + + def test_interrupted_trial(self): + """Test that interrupted trials are not set to broken""" + with create_experiment() as (cfg, experiment, client): + trial = client.suggest() + assert trial.status == 'reserved' + + try: + raise KeyboardInterrupt + except KeyboardInterrupt as e: + atexit._run_exitfuncs() + + assert client._pacemakers == {} + assert client.get_trial(trial).status == 'interrupted' + + +class TestSuggest: + """Tests for ExperimentClient.suggest""" + + def test_suggest(self): + """Verify that suggest reserved availabe trials.""" + with create_experiment() as (cfg, experiment, client): + trial = client.suggest() + assert trial.status == 'reserved' + assert trial.params['x'] == 1 + + assert len(experiment.fetch_trials()) == 5 + assert client._pacemakers[trial.id].is_alive() + client._pacemakers.pop(trial.id).stop() + + def test_suggest_new(self): + """Verify that suggest can create, register and reserved new trials.""" + with create_experiment() as (cfg, experiment, client): + for _ in range(3): + trial = client.suggest() + assert trial.status == 'reserved' + assert len(experiment.fetch_trials()) == 5 + assert client._pacemakers[trial.id].is_alive() + client._pacemakers[trial.id].stop() + + trial = client.suggest() + assert trial.status == 'reserved' + assert trial.params['x'] == 57.57 + assert len(experiment.fetch_trials()) == 6 + + assert client._pacemakers[trial.id].is_alive() + for trial_id in list(client._pacemakers.keys()): + client._pacemakers.pop(trial_id).stop() + + def test_suggest_race_condition(self, monkeypatch): + """Verify that race conditions to register new trials is handled""" + new_value = 50. + + # algo will suggest once an already existing trial + def amnesia(num=1): + """Suggest a new value and then always suggest the same""" + if amnesia.count == 0: + value = [0] + else: + value = [new_value] + + amnesia.count += 1 + + return [value] + + amnesia.count = 0 + + with create_experiment(stati=['completed']) as (cfg, experiment, client): + + monkeypatch.setattr(experiment.algorithms, 'suggest', amnesia) + + assert len(experiment.fetch_trials()) == 1 + + trial = client.suggest() + assert trial.status == 'reserved' + assert trial.params['x'] == new_value + assert amnesia.count == 2 + + assert len(experiment.fetch_trials()) == 2 + assert client._pacemakers[trial.id].is_alive() + client._pacemakers.pop(trial.id).stop() + + def test_suggest_algo_opt_out(self, monkeypatch): + """Verify that None is returned when algo cannot sample new trials (opting opt)""" + def opt_out(num=1): + """Never suggest a new trial""" + return None + + monkeypatch.setattr(orion.core.config.worker, 'max_idle_time', 0) + + with create_experiment(stati=['completed']) as (cfg, experiment, client): + + monkeypatch.setattr(experiment.algorithms, 'suggest', opt_out) + + assert len(experiment.fetch_trials()) == 1 + + with pytest.raises(SampleTimeout): + client.suggest() + + def test_suggest_is_done(self): + """Verify that completed experiments cannot suggest new trials""" + with create_experiment(stati=['completed'] * 10) as (cfg, experiment, client): + + assert len(experiment.fetch_trials()) == 10 + assert client.is_done + + assert client.suggest() is None + + def test_suggest_is_broken(self): + """Verify that broken experiments cannot suggest new trials""" + with create_experiment(stati=['broken'] * 10) as (cfg, experiment, client): + + assert len(experiment.fetch_trials()) == 10 + assert client.is_broken + + with pytest.raises(BrokenExperiment): + client.suggest() + + def test_suggest_is_done_race_condition(self, monkeypatch): + """Verify that inability to suggest because is_done becomes True during produce() is + handled. + """ + with create_experiment(stati=['completed'] * 5) as (cfg, experiment, client): + def is_done(self): + """Experiment is done""" + return True + + def set_is_done(): + """Set is_done while algo is trying to suggest""" + monkeypatch.setattr(experiment.__class__, 'is_done', property(is_done)) + + monkeypatch.setattr(client._producer, 'produce', set_is_done) + + assert len(experiment.fetch_trials()) == 5 + assert not client.is_done + + assert client.suggest() is None + + assert len(experiment.fetch_trials()) == 5 + assert client.is_done + + def test_suggest_is_broken_race_condition(self, monkeypatch): + """Verify that experiments that gets broken during local algo.suggest gets properly + handled + """ + with create_experiment(stati=['broken'] * 1) as (cfg, experiment, client): + + def is_broken(self): + """Experiment is broken""" + return True + + def set_is_broken(): + """Set is_broken while algo is trying to suggest""" + monkeypatch.setattr(experiment.__class__, 'is_broken', property(is_broken)) + + monkeypatch.setattr(client._producer, 'produce', set_is_broken) + + assert len(experiment.fetch_trials()) == 1 + assert not client.is_broken + + with pytest.raises(BrokenExperiment): + client.suggest() + + assert len(experiment.fetch_trials()) == 1 + assert client.is_broken + + def test_suggest_hierarchical_space(self): + """Verify that suggest returns trial with proper hierarchical parameter.""" + exp_config = copy.deepcopy(config) + exp_config['space'] = { + 'a': {'x': 'uniform(0, 10, discrete=True)'}, + 'b': {'y': 'loguniform(1e-08, 1)', + 'z': 'choices([\'voici\', \'voila\', 2])'}} + with create_experiment(exp_config=exp_config, stati=[]) as (cfg, experiment, client): + trial = client.suggest() + assert trial.status == 'reserved' + assert len(trial.params) == 2 + assert 'x' in trial.params['a'] + assert 'y' in trial.params['b'] + assert 'z' in trial.params['b'] + + assert client._pacemakers[trial.id].is_alive() + client._pacemakers.pop(trial.id).stop() + + +class TestObserve: + """Tests for ExperimentClient.observe""" + + def test_observe(self): + """Verify that `observe()` will update the storage""" + with create_experiment() as (cfg, experiment, client): + trial = Trial(**cfg.trials[1]) + assert trial.results == [] + client.reserve(trial) + assert get_storage().get_trial(trial).objective is None + client.observe(trial, [dict(name='objective', type='objective', value=101)]) + assert get_storage().get_trial(trial).objective.value == 101 + + def test_observe_unreserved(self): + """Verify that `observe()` will fail on non-reserved trials""" + with create_experiment() as (cfg, experiment, client): + trial = Trial(**cfg.trials[1]) + with pytest.raises(RuntimeError) as exc: + client.observe(trial, [dict(name='objective', type='objective', value=101)]) + + assert ('Trial {} had no pacemakers. Was is reserved properly?'.format(trial.id) == + str(exc.value)) + + def test_observe_dont_exist(self): + """Verify that `observe()` will fail on non-registered trials""" + with create_experiment() as (cfg, experiment, client): + trial = Trial(experiment='idontexist', params=cfg.trials[0]['params']) + with pytest.raises(ValueError) as exc: + client.observe(trial, [dict(name='objective', type='objective', value=101)]) + + assert 'Trial {} does not exist in database.'.format(trial.id) == str(exc.value) + assert client._pacemakers == {} + + def test_observe_bad_results(self): + """Verify that bad results type is detected and ValueError is raised""" + with create_experiment() as (cfg, experiment, client): + trial = Trial(**cfg.trials[1]) + client.reserve(trial) + with pytest.raises(ValueError) as exc: + client.observe(trial, [dict(name='objective', type='bad bad bad', value=101)]) + + assert 'Given type, bad bad bad, not one of: ' in str(exc.value) + assert client._pacemakers[trial.id].is_alive() + client._pacemakers.pop(trial.id).stop() + + def test_observe_race_condition(self): + """Verify that race condition during `observe()` is detected and raised""" + with create_experiment() as (cfg, experiment, client): + trial = client.get_trial(uid=cfg.trials[1]['_id']) + client.reserve(trial) + experiment.set_trial_status(trial, 'interrupted') + trial.status = 'reserved' # Let's pretend it is still reserved + + with pytest.raises(RuntimeError) as exc: + client.observe(trial, [dict(name='objective', type='objective', value=101)]) + + assert 'Reservation for trial {} has been lost.'.format(trial.id) == str(exc.value) + assert client._pacemakers == {} + + +class TestWorkon: + """Tests for ExperimentClient.workon""" + + def test_workon(self): + """Verify that workon processes properly""" + def foo(x): + return [dict(name='result', type='objective', value=x * 2)] + + with create_experiment(stati=[]) as (cfg, experiment, client): + client.workon(foo, max_trials=5) + assert len(experiment.fetch_trials()) == 5 + assert client._pacemakers == {} + + def test_workon_partial(self): + """Verify that partial is properly passed to the function""" + def foo(x, y): + return [dict(name='result', type='objective', value=x * 2 + y)] + + with create_experiment(stati=[]) as (cfg, experiment, client): + client.workon(foo, max_trials=10, y=2) + assert len(experiment.fetch_trials()) == 10 + assert client._pacemakers == {} + + def test_workon_partial_with_override(self): + """Verify that partial is overriden by trial.params""" + def foo(x, y): + return [dict(name='result', type='objective', value=x * 2 + y)] + + ext_config = copy.deepcopy(config) + ext_config['space']['y'] = 'uniform(0, 10)' + + with create_experiment(exp_config=ext_config, stati=[]) as (cfg, experiment, client): + default_y = 2 + assert len(experiment.fetch_trials()) == 0 + client.workon(foo, max_trials=1, y=default_y) + assert len(experiment.fetch_trials()) == 1 + assert experiment.fetch_trials()[0].params['y'] != 2 + + def test_workon_hierarchical_partial_with_override(self): + """Verify that hierarchical partial is overriden by trial.params""" + default_y = 2 + default_z = 'voila' + + def foo(a, b): + assert b['y'] != default_y + assert b['z'] == default_z + return [dict(name='result', type='objective', value=a['x'] * 2 + b['y'])] + + ext_config = copy.deepcopy(config) + ext_config['space'] = { + 'a': {'x': 'uniform(0, 10, discrete=True)'}, + 'b': {'y': 'loguniform(1e-08, 1)'}} + + with create_experiment(exp_config=ext_config, stati=[]) as (cfg, experiment, client): + assert len(experiment.fetch_trials()) == 0 + client.workon(foo, max_trials=5, b={'y': default_y, 'z': default_z}) + assert len(experiment.fetch_trials()) == 5 + params = experiment.fetch_trials()[0].params + assert len(params) + assert 'x' in params['a'] + assert 'y' in params['b'] diff --git a/tests/unittests/core/cli/test_checks.py b/tests/unittests/core/cli/test_checks.py index 40055f136..b590b390e 100644 --- a/tests/unittests/core/cli/test_checks.py +++ b/tests/unittests/core/cli/test_checks.py @@ -3,11 +3,13 @@ """Collection of tests for :mod:`orion.core.cli.checks`.""" import pytest +import orion.core from orion.core.cli.checks.creation import CreationStage from orion.core.cli.checks.operations import OperationsStage from orion.core.cli.checks.presence import PresenceStage from orion.core.io.database.mongodb import MongoDB -from orion.core.io.experiment_builder import ExperimentBuilder +import orion.core.io.experiment_builder as experiment_builder +import orion.core.utils.backward as backward from orion.core.utils.exceptions import CheckError @@ -18,15 +20,9 @@ def config(): @pytest.fixture -def builder(): - """Return an ExperimentBuilder instance.""" - return ExperimentBuilder() - - -@pytest.fixture -def presence(builder): +def presence(): """Return a PresenceStage instance.""" - return PresenceStage(builder, []) + return PresenceStage([]) @pytest.fixture @@ -51,24 +47,24 @@ def clean_test(database): def test_check_default_config_pass(monkeypatch, presence, config): """Check if the default config test works.""" - def mock_default_config(): + def mock_default_config(self): return config - monkeypatch.setattr(presence.builder, 'fetch_default_options', mock_default_config) + monkeypatch.setattr(orion.core.config.__class__, 'to_dict', mock_default_config) result, msg = presence.check_default_config() assert result == "Success" assert msg == "" - assert presence.db_config == config['database'] + assert presence.db_config == config['storage']['database'] def test_check_default_config_skip(monkeypatch, presence): """Check if test returns skip if no default config is found.""" - def mock_default_config(): + def mock_default_config(self): return {} - monkeypatch.setattr(presence.builder, 'fetch_default_options', mock_default_config) + monkeypatch.setattr(orion.core.config.__class__, 'to_dict', mock_default_config) result, msg = presence.check_default_config() assert result == "Skipping" @@ -76,48 +72,27 @@ def mock_default_config(): assert presence.db_config == {} -def test_envvar_config_pass(monkeypatch, presence): - """Check if test passes when all environment variables are set.""" - monkeypatch.setenv('ORION_DB_NAME', 'orion') - monkeypatch.setenv('ORION_DB_TYPE', 'mongodb') - monkeypatch.setenv('ORION_DB_ADDRESS', 'localhost') - - result, msg = presence.check_environment_vars() - - assert result == "Success" - assert msg == "" - assert presence.db_config == {'name': 'orion', 'type': 'mongodb', 'host': 'localhost'} - - -def test_envvar_config_skip(monkeypatch, presence): - """Check if test skips when there is no environment variable.""" - result, msg = presence.check_environment_vars() - - assert result == "Skipping" - assert 'No' in msg - assert presence.db_config == {} - - def test_config_file_config_pass(monkeypatch, presence, config): """Check if test passes with valid configuration.""" - def mock_file_config(self): + def mock_file_config(cmdargs): + backward.update_db_config(config) return config - monkeypatch.setattr(presence.builder, "fetch_file_config", mock_file_config) + monkeypatch.setattr(experiment_builder, "get_cmd_config", mock_file_config) result, msg = presence.check_configuration_file() assert result == "Success" assert msg == "" - assert presence.db_config == config['database'] + assert presence.db_config == config['storage']['database'] def test_config_file_fails_missing_config(monkeypatch, presence, config): """Check if test fails with missing configuration.""" - def mock_file_config(self): + def mock_file_config(cmdargs): return {} - monkeypatch.setattr(presence.builder, "fetch_file_config", mock_file_config) + monkeypatch.setattr(experiment_builder, "get_cmd_config", mock_file_config) status, msg = presence.check_configuration_file() @@ -128,10 +103,10 @@ def mock_file_config(self): def test_config_file_fails_missing_database(monkeypatch, presence, config): """Check if test fails with missing database configuration.""" - def mock_file_config(self): + def mock_file_config(cmdargs): return {'algorithm': 'asha'} - monkeypatch.setattr(presence.builder, "fetch_file_config", mock_file_config) + monkeypatch.setattr(experiment_builder, "get_cmd_config", mock_file_config) status, msg = presence.check_configuration_file() @@ -142,10 +117,10 @@ def mock_file_config(self): def test_config_file_fails_missing_value(monkeypatch, presence, config): """Check if test fails with missing value in database configuration.""" - def mock_file_config(self): - return {'database': {}} + def mock_file_config(cmdargs): + return {'storage': {'database': {}}} - monkeypatch.setattr(presence.builder, "fetch_file_config", mock_file_config) + monkeypatch.setattr(experiment_builder, "get_cmd_config", mock_file_config) status, msg = presence.check_configuration_file() @@ -160,7 +135,7 @@ def mock_file_config(self): return {} presence.db_config = config['database'] - monkeypatch.setattr(presence.builder, "fetch_file_config", mock_file_config) + monkeypatch.setattr(experiment_builder, "get_cmd_config", mock_file_config) result, msg = presence.check_configuration_file() diff --git a/tests/unittests/core/cli/test_evc.py b/tests/unittests/core/cli/test_evc.py index 724c33c59..af0d7a25e 100644 --- a/tests/unittests/core/cli/test_evc.py +++ b/tests/unittests/core/cli/test_evc.py @@ -17,7 +17,8 @@ def test_defined_parser(): options = parser.parse_args([]) assert options.manual_resolution is False assert options.algorithm_change is False - assert options.branch is None + assert options.branch_to is None + assert options.branch_from is None assert options.cli_change_type is None assert options.code_change_type is None assert options.config_change_type is None diff --git a/tests/unittests/core/cli/test_info.py b/tests/unittests/core/cli/test_info.py index 92a80c8d0..c64448f51 100755 --- a/tests/unittests/core/cli/test_info.py +++ b/tests/unittests/core/cli/test_info.py @@ -5,11 +5,11 @@ import pytest -from orion.core.cli.info import ( +from orion.core.io.space_builder import SpaceBuilder +from orion.core.utils.format_terminal import ( format_algorithm, format_commandline, format_config, format_dict, format_identification, format_info, format_list, format_metadata, format_refers, format_space, format_stats, format_title, get_trial_params) -from orion.core.io.space_builder import SpaceBuilder from orion.core.worker.trial import Trial @@ -23,7 +23,7 @@ class DummyExperiment(): def dummy_trial(): """Return a dummy trial object""" trial = Trial() - trial.params = [ + trial._params = [ Trial.Param(name='a', type='real', value=0.0), Trial.Param(name='b', type='integer', value=1), Trial.Param(name='c', type='categorical', value='Some')] @@ -422,15 +422,15 @@ def test_format_algorithm(algorithm_dict): def test_format_space(): """Test space section formatting""" experiment = DummyExperiment() - commandline = ['executing.sh', '--some~choices(["random", "or", "not"])', - '--command~uniform(0, 1)'] - space = SpaceBuilder().build_from(commandline) + space = SpaceBuilder().build( + {"some": 'choices(["random", "or", "not"])', + "command": 'uniform(0, 1)'}) experiment.space = space assert format_space(experiment) == """\ Space ===== -/command: uniform(0, 1) -/some: choices(['random', 'or', 'not']) +command: uniform(0, 1) +some: choices(['random', 'or', 'not']) """ @@ -576,7 +576,9 @@ def test_format_info(algorithm_dict, dummy_trial): experiment.max_trials = 100 experiment.configuration = {'algorithms': algorithm_dict} - space = SpaceBuilder().build_from(commandline) + space = SpaceBuilder().build( + {"some": 'choices(["random", "or", "not"])', + "command": 'uniform(0, 1)'}) experiment.space = space experiment.metadata.update(dict( user='user', @@ -648,8 +650,8 @@ def test_format_info(algorithm_dict, dummy_trial): Space ===== -/command: uniform(0, 1) -/some: choices(['random', 'or', 'not']) +command: uniform(0, 1) +some: choices(['random', 'or', 'not']) Meta-data diff --git a/tests/unittests/core/conftest.py b/tests/unittests/core/conftest.py index dba484bd7..ea40bfcf5 100644 --- a/tests/unittests/core/conftest.py +++ b/tests/unittests/core/conftest.py @@ -11,10 +11,10 @@ from orion.algo.space import (Categorical, Integer, Real, Space) from orion.core.evc import conflicts from orion.core.io.convert import (JSONConverter, YAMLConverter) +import orion.core.io.experiment_builder as experiment_builder from orion.core.io.space_builder import DimensionBuilder import orion.core.utils.backward as backward from orion.core.utils.tests import default_datetime, MockDatetime -from orion.core.worker.experiment import Experiment TEST_DIR = os.path.dirname(os.path.abspath(__file__)) YAML_SAMPLE = os.path.join(TEST_DIR, 'sample_config.yml') @@ -110,6 +110,20 @@ def space(): return space +@pytest.fixture(scope='module') +def hierarchical_space(): + """Construct a space with hierarchical Dimensions.""" + space = Space() + categories = {'asdfa': 0.1, 2: 0.2, 3: 0.3, 4: 0.4} + dim = Categorical('yolo.first', categories, shape=2) + space.register(dim) + dim = Integer('yolo.second', 'uniform', -3, 6) + space.register(dim) + dim = Real('yoloflat', 'alpha', 0.9) + space.register(dim) + return space + + @pytest.fixture(scope='module') def fixed_suggestion(): """Return the same tuple/sample from a possible space.""" @@ -146,7 +160,7 @@ def hacked_exp(with_user_dendi, random_dt, clean_db, create_db_instance): """Return an `Experiment` instance with hacked _id to find trials in fake database. """ - exp = Experiment('supernaedo2-dendi') + exp = experiment_builder.build(name='supernaedo2-dendi') exp._id = 'supernaedo2-dendi' # white box hack return exp @@ -196,17 +210,18 @@ def refers_id_substitution(with_user_tsirif, random_dt, clean_db, create_db_inst @pytest.fixture def new_config(): """Generate a new experiment configuration""" + user_script = 'abs_path/black_box.py' config = dict( name='test', algorithms='fancy', version=1, metadata={'VCS': 'to be changed', - 'user_script': 'abs_path/black_box.py', - 'user_args': - ['--new~normal(0,2)', '--changed~normal(0,2)'], + 'user_script': user_script, + 'user_args': [ + user_script, '--new~normal(0,2)', '--changed~normal(0,2)'], 'user': 'some_user_name'}) - backward.populate_priors(config['metadata']) + backward.populate_space(config) return config @@ -214,6 +229,7 @@ def new_config(): @pytest.fixture def old_config(create_db_instance): """Generate an old experiment configuration""" + user_script = 'abs_path/black_box.py' config = dict( name='test', algorithms='random', @@ -224,12 +240,12 @@ def old_config(create_db_instance): "active_branch": None, "diff_sha": "diff", }, - 'user_script': 'abs_path/black_box.py', - 'user_args': - ['--missing~uniform(-10,10)', '--changed~uniform(-10,10)'], + 'user_script': user_script, + 'user_args': [ + user_script, '--missing~uniform(-10,10)', '--changed~uniform(-10,10)'], 'user': 'some_user_name'}) - backward.populate_priors(config['metadata']) + backward.populate_space(config) create_db_instance.write('experiments', config) return config @@ -309,7 +325,7 @@ def cli_conflict(old_config, new_config): new_config = copy.deepcopy(new_config) new_config['metadata']['user_args'].append("--some-new=args") new_config['metadata']['user_args'].append("--bool-arg") - backward.populate_priors(new_config['metadata']) + backward.populate_space(new_config) return conflicts.CommandLineConflict(old_config, new_config) @@ -335,7 +351,7 @@ def bad_exp_parent_config(): version=1, algorithms='random') - backward.populate_priors(config['metadata']) + backward.populate_space(config) return config diff --git a/tests/unittests/core/evc/conftest.py b/tests/unittests/core/evc/conftest.py index bebccab6b..9d814035b 100644 --- a/tests/unittests/core/evc/conftest.py +++ b/tests/unittests/core/evc/conftest.py @@ -55,5 +55,5 @@ def exp_w_child_as_parent_conflict(create_db_instance, parent_config, child_conf def existing_exp_conflict(create_db_instance, parent_config): """Generate an experiment name conflict""" create_db_instance.write('experiments', parent_config) - create_db_instance.write('experiments', {'name': 'dummy', 'metadata': {'user': 'corneauf'}}) + create_db_instance.write('experiments', {'name': 'dummy', 'version': 1}) return conflicts.ExperimentNameConflict(parent_config, parent_config) diff --git a/tests/unittests/core/evc/test_adapters.py b/tests/unittests/core/evc/test_adapters.py index a2d1acb2d..90f48fa39 100644 --- a/tests/unittests/core/evc/test_adapters.py +++ b/tests/unittests/core/evc/test_adapters.py @@ -290,9 +290,9 @@ def test_dimension_addition_forward(self, trials): adapted_trials = dimension_addition_adapter.forward(trials) - assert adapted_trials[0].params[-1] == new_param - assert adapted_trials[4].params[-1] == new_param - assert adapted_trials[-1].params[-1] == new_param + assert adapted_trials[0]._params[-1] == new_param + assert adapted_trials[4]._params[-1] == new_param + assert adapted_trials[-1]._params[-1] == new_param def test_dimension_addition_forward_already_existing(self, trials): """Test :meth:`orion.core.evc.adapters.DimensionAddition.forward` @@ -316,32 +316,32 @@ def test_dimension_addition_backward(self, dummy_param, trials): for trial in trials: random_param = new_param.to_dict() random_param['value'] = sampler.sample() - trial.params.append(Trial.Param(**random_param)) + trial._params.append(Trial.Param(**random_param)) adapted_trials = dimension_addition_adapter.backward(trials) assert len(adapted_trials) == 0 - trials[0].params[-1].value = 1 - assert trials[0].params[-1] == new_param + trials[0]._params[-1].value = 1 + assert trials[0]._params[-1] == new_param adapted_trials = dimension_addition_adapter.backward(trials) assert len(adapted_trials) == 1 - trials[4].params[-1].value = 1 - assert trials[4].params[-1] == new_param + trials[4]._params[-1].value = 1 + assert trials[4]._params[-1] == new_param adapted_trials = dimension_addition_adapter.backward(trials) assert len(adapted_trials) == 2 - trials[-1].params[-1].value = 1 - assert trials[-1].params[-1] == new_param + trials[-1]._params[-1].value = 1 + assert trials[-1]._params[-1] == new_param adapted_trials = dimension_addition_adapter.backward(trials) assert len(adapted_trials) == 3 - assert new_param not in (adapted_trials[0].params) - assert new_param not in (adapted_trials[1].params) - assert new_param not in (adapted_trials[2].params) + assert new_param not in (adapted_trials[0]._params) + assert new_param not in (adapted_trials[1]._params) + assert new_param not in (adapted_trials[2]._params) def test_dimension_addition_backward_not_existing(self, trials): """Test :meth:`orion.core.evc.adapters.DimensionAddition.backward` @@ -371,32 +371,32 @@ def test_dimension_deletion_forward(self, trials): for trial in trials: random_param = new_param.to_dict() random_param['value'] = sampler.sample() - trial.params.append(Trial.Param(**random_param)) + trial._params.append(Trial.Param(**random_param)) adapted_trials = dimension_deletion_adapter.forward(trials) assert len(adapted_trials) == 0 - trials[0].params[-1].value = 1 - assert trials[0].params[-1] == new_param + trials[0]._params[-1].value = 1 + assert trials[0]._params[-1] == new_param adapted_trials = dimension_deletion_adapter.forward(trials) assert len(adapted_trials) == 1 - trials[4].params[-1].value = 1 - assert trials[4].params[-1] == new_param + trials[4]._params[-1].value = 1 + assert trials[4]._params[-1] == new_param adapted_trials = dimension_deletion_adapter.forward(trials) assert len(adapted_trials) == 2 - trials[-1].params[-1].value = 1 - assert trials[-1].params[-1] == new_param + trials[-1]._params[-1].value = 1 + assert trials[-1]._params[-1] == new_param adapted_trials = dimension_deletion_adapter.forward(trials) assert len(adapted_trials) == 3 - assert new_param not in (adapted_trials[0].params) - assert new_param not in (adapted_trials[1].params) - assert new_param not in (adapted_trials[2].params) + assert new_param not in (adapted_trials[0]._params) + assert new_param not in (adapted_trials[1]._params) + assert new_param not in (adapted_trials[2]._params) def test_dimension_deletion_forward_not_existing(self, trials): """Test :meth:`orion.core.evc.adapters.DimensionDeletion.forward` @@ -418,9 +418,9 @@ def test_dimension_deletion_backward(self, dummy_param, trials): adapted_trials = dimension_deletion_adapter.backward(trials) - assert adapted_trials[0].params[-1] == new_param - assert adapted_trials[4].params[-1] == new_param - assert adapted_trials[-1].params[-1] == new_param + assert adapted_trials[0]._params[-1] == new_param + assert adapted_trials[4]._params[-1] == new_param + assert adapted_trials[-1]._params[-1] == new_param def test_dimension_deletion_backward_already_existing(self, trials): """Test :meth:`orion.core.evc.adapters.DimensionDeletion.backward` @@ -504,11 +504,11 @@ def test_dimension_renaming_forward(self, trials): assert len(adapted_trials) == len(trials) - assert new_name in [param.name for param in adapted_trials[0].params] - assert old_name not in [param.name for param in adapted_trials[0].params] + assert new_name in [param.name for param in adapted_trials[0]._params] + assert old_name not in [param.name for param in adapted_trials[0]._params] - assert new_name in [param.name for param in adapted_trials[-1].params] - assert old_name not in [param.name for param in adapted_trials[-1].params] + assert new_name in [param.name for param in adapted_trials[-1]._params] + assert old_name not in [param.name for param in adapted_trials[-1]._params] def test_dimension_renaming_forward_incompatible(self, trials): """Test :meth:`orion.core.evc.adapters.DimensionRenaming.forward` @@ -536,11 +536,11 @@ def test_dimension_renaming_backward(self, trials): assert len(adapted_trials) == len(trials) - assert old_name in [param.name for param in adapted_trials[0].params] - assert new_name not in [param.name for param in adapted_trials[0].params] + assert old_name in [param.name for param in adapted_trials[0]._params] + assert new_name not in [param.name for param in adapted_trials[0]._params] - assert old_name in [param.name for param in adapted_trials[-1].params] - assert new_name not in [param.name for param in adapted_trials[-1].params] + assert old_name in [param.name for param in adapted_trials[-1]._params] + assert new_name not in [param.name for param in adapted_trials[-1]._params] def test_dimension_renaming_backward_incompatible(self, trials): """Test :meth:`orion.core.evc.adapters.DimensionRenaming.backward` @@ -672,9 +672,9 @@ def test_composite_adapter_forward(self, dummy_param, trials): adapted_trials = composite_adapter.forward(trials) - assert adapted_trials[0].params[-1] == new_param - assert adapted_trials[4].params[-1] == new_param - assert adapted_trials[-1].params[-1] == new_param + assert adapted_trials[0]._params[-1] == new_param + assert adapted_trials[4]._params[-1] == new_param + assert adapted_trials[-1]._params[-1] == new_param composite_adapter = CompositeAdapter(dimension_addition_adapter, dimension_deletion_adapter) @@ -682,9 +682,9 @@ def test_composite_adapter_forward(self, dummy_param, trials): assert len(adapted_trials) == len(trials) - assert new_param not in (adapted_trials[0].params) - assert new_param not in (adapted_trials[4].params) - assert new_param not in (adapted_trials[-1].params) + assert new_param not in (adapted_trials[0]._params) + assert new_param not in (adapted_trials[4]._params) + assert new_param not in (adapted_trials[-1]._params) def test_composite_adapter_backward(self, dummy_param, trials): """Test :meth:`orion.core.evc.adapters.CompositeAdapter.backward` with two adapters""" @@ -697,9 +697,9 @@ def test_composite_adapter_backward(self, dummy_param, trials): adapted_trials = composite_adapter.backward(trials) - assert adapted_trials[0].params[-1] == new_param - assert adapted_trials[4].params[-1] == new_param - assert adapted_trials[-1].params[-1] == new_param + assert adapted_trials[0]._params[-1] == new_param + assert adapted_trials[4]._params[-1] == new_param + assert adapted_trials[-1]._params[-1] == new_param composite_adapter = CompositeAdapter(dimension_addition_adapter, dimension_deletion_adapter) @@ -707,9 +707,9 @@ def test_composite_adapter_backward(self, dummy_param, trials): assert len(adapted_trials) == len(trials) - assert new_param not in (adapted_trials[0].params) - assert new_param not in (adapted_trials[4].params) - assert new_param not in (adapted_trials[-1].params) + assert new_param not in (adapted_trials[0]._params) + assert new_param not in (adapted_trials[4]._params) + assert new_param not in (adapted_trials[-1]._params) def test_dimension_addition_configuration(dummy_param): diff --git a/tests/unittests/core/evc/test_conflicts.py b/tests/unittests/core/evc/test_conflicts.py index 92491aa5c..aff4d6f40 100644 --- a/tests/unittests/core/evc/test_conflicts.py +++ b/tests/unittests/core/evc/test_conflicts.py @@ -285,7 +285,9 @@ def test_try_resolve_bad_cli(self, cli_conflict): def test_repr(self, cli_conflict): """Verify the representation of conflict for user interface""" assert ( - repr(cli_conflict) == "Old arguments '' != new arguments 'bool-arg True some-new args'") + repr(cli_conflict) == ( + "Old arguments '_pos_0 abs_path/black_box.py' != " + "new arguments '_pos_0 abs_path/black_box.py bool-arg True some-new args'")) class TestScriptConfigConflict(object): @@ -328,8 +330,8 @@ def test_comparison(self, yaml_config, yaml_diff_config): old_config = {'metadata': {'user_args': yaml_config}} new_config = {'metadata': {'user_args': yaml_diff_config}} - backward.populate_priors(old_config['metadata']) - backward.populate_priors(new_config['metadata']) + backward.populate_space(old_config) + backward.populate_space(new_config) conflicts = list(conflict.ScriptConfigConflict.detect(old_config, new_config)) assert len(conflicts) == 1 @@ -339,8 +341,8 @@ def test_comparison_idem(self, yaml_config): old_config = {'metadata': {'user_args': yaml_config}} new_config = {'metadata': {'user_args': yaml_config + ['--other', 'args']}} - backward.populate_priors(old_config['metadata']) - backward.populate_priors(new_config['metadata']) + backward.populate_space(old_config) + backward.populate_space(new_config) assert list(conflict.ScriptConfigConflict.detect(old_config, new_config)) == [] @@ -418,7 +420,7 @@ def test_conflict_exp_renamed(self, exp_w_child_conflict): def test_repr(self, experiment_name_conflict): """Verify the representation of conflict for user interface""" assert (repr(experiment_name_conflict) == - "Experiment name 'test' already exist for user 'some_user_name'") + "Experiment name 'test' already exist with version '1'") class TestConflicts(object): diff --git a/tests/unittests/core/evc/test_experiment_tree.py b/tests/unittests/core/evc/test_experiment_tree.py index 116a67094..b07d9f4b6 100644 --- a/tests/unittests/core/evc/test_experiment_tree.py +++ b/tests/unittests/core/evc/test_experiment_tree.py @@ -232,12 +232,12 @@ def test_renaming_forward(create_db_instance): assert len(parent_trials) == 6 assert len(exp_node.item.fetch_trials(query)) == 4 - assert all((trial.params[0].name == "/encoding_layer") for trial in parent_trials) + assert all((trial._params[0].name == "/encoding_layer") for trial in parent_trials) adapter = experiment.refers['adapter'] adapted_parent_trials = adapter.forward(parent_trials) assert len(adapted_parent_trials) == 6 - assert all((trial.params[0].name == "/encoding") for trial in adapted_parent_trials) + assert all((trial._params[0].name == "/encoding") for trial in adapted_parent_trials) assert len(experiment.fetch_trials_tree(query)) == 6 + 4 @@ -268,12 +268,12 @@ def test_renaming_backward(create_db_instance): assert len(children_trials) == 4 assert len(exp_node.item.fetch_trials(query)) == 6 - assert all((trial.params[0].name == "/encoding") for trial in children_trials) + assert all((trial._params[0].name == "/encoding") for trial in children_trials) adapter = exp_node.children[0].item.refers['adapter'] adapted_children_trials = adapter.backward(children_trials) assert len(adapted_children_trials) == 4 - assert all((trial.params[0].name == "/encoding_layer") for trial in adapted_children_trials) + assert all((trial._params[0].name == "/encoding_layer") for trial in adapted_children_trials) assert len(experiment.fetch_trials_tree(query)) == 6 + 4 diff --git a/tests/unittests/core/evc/test_resolutions.py b/tests/unittests/core/evc/test_resolutions.py index acf3088dd..77796cfd7 100644 --- a/tests/unittests/core/evc/test_resolutions.py +++ b/tests/unittests/core/evc/test_resolutions.py @@ -379,7 +379,7 @@ def test_adapters(self, experiment_name_resolution): def test_repr(self, experiment_name_resolution): """Verify resolution representation for user interface""" - assert repr(experiment_name_resolution) == '--branch new-exp-name' + assert repr(experiment_name_resolution) == '--branch-to new-exp-name' def test_revert(self, old_config, new_config, experiment_name_conflict, experiment_name_resolution): diff --git a/tests/unittests/core/io/interactive_commands/test_branching_prompt.py b/tests/unittests/core/io/interactive_commands/test_branching_prompt.py index 827b47ee1..e5d113ca2 100644 --- a/tests/unittests/core/io/interactive_commands/test_branching_prompt.py +++ b/tests/unittests/core/io/interactive_commands/test_branching_prompt.py @@ -437,7 +437,7 @@ def test_set_experiment_bad_name(self, capsys, conflicts, branch_solver_prompt): assert len(conflicts.get_resolved()) == 0 branch_solver_prompt.do_name("test") out, err = capsys.readouterr() - assert "Experiment name 'test' already exist for user" in out + assert "Experiment name 'test' already exist for version '1'" in out assert len(conflicts.get_resolved()) == 0 def test_set_experiment_name_twice(self, capsys, conflicts, branch_solver_prompt): diff --git a/tests/unittests/core/io/orion_config.yaml b/tests/unittests/core/io/orion_config.yaml index e34d42505..1a5df2129 100644 --- a/tests/unittests/core/io/orion_config.yaml +++ b/tests/unittests/core/io/orion_config.yaml @@ -1,11 +1,10 @@ -name: voila_voici +experiment: + name: voila_voici + pool_size: 1 + max_trials: 100 -pool_size: 1 -max_trials: 100 + algorithms: 'random' -algorithms: 'random' - -producer: strategy: NoParallelStrategy database: diff --git a/tests/unittests/core/io/test_cmdline_parser.py b/tests/unittests/core/io/test_cmdline_parser.py index 46728f745..525c9135f 100644 --- a/tests/unittests/core/io/test_cmdline_parser.py +++ b/tests/unittests/core/io/test_cmdline_parser.py @@ -28,6 +28,25 @@ def basic_config(): return config +@pytest.fixture +def basic_keys(): + """Return keys of a simple configuration""" + keys = OrderedDict() + + keys['_pos_0'] = '_pos_0' + keys['_pos_1'] = '_pos_1' + keys['_pos_2'] = '_pos_2' + keys['_pos_3'] = '_pos_3' + keys['_pos_4'] = '_pos_4' + keys['with'] = '--with' + keys['and'] = '--and' + keys['plus'] = '--plus' + keys['booleans'] = '--booleans' + keys['equal'] = '--equal' + + return keys + + @pytest.fixture def to_format(): """Return a commandline to format""" @@ -56,14 +75,15 @@ def test_parse_paths(monkeypatch): assert parsed_values[3] == os.path.abspath(__file__) -def test_parse_arguments(basic_config): +def test_parse_arguments(basic_config, basic_keys): """Test the parsing of the commandline arguments""" cmdline_parser = CmdlineParser() - arguments = cmdline_parser._parse_arguments( + keys, arguments = cmdline_parser._parse_arguments( "python script.py some pos args --with args --and multiple args " "--plus --booleans --equal=value".split(" ")) assert arguments == basic_config + assert keys == basic_keys def test_parse_arguments_template(): @@ -86,7 +106,7 @@ def test_format(to_format): cmdline_parser.parse(to_format.split(' ')) - arguments = cmdline_parser._parse_arguments(to_format.split(' ')) + keys, arguments = cmdline_parser._parse_arguments(to_format.split(' ')) formatted = cmdline_parser.format(arguments) @@ -129,6 +149,7 @@ def test_get_state_dict(): "--with args --and multiple args --plus --booleans --equal=value".split(" ")) assert cmdline_parser.get_state_dict() == { + 'keys': list(map(list, cmdline_parser.keys.items())), 'arguments': list(map(list, cmdline_parser.arguments.items())), 'template': cmdline_parser.template} @@ -143,3 +164,42 @@ def test_set_state_dict(): assert cmdline_parser.format({'_pos_0': 'voici', '_pos_1': 'voila', 'with': 'classe'}) == [ 'voici', 'voila', '--with', 'classe', '--plus', '--booleans'] + + +def test_parse_not_enough_dashes(): + """Test that arguments with many chars but one dash are supported even if it is not standard""" + cmdline_parser = CmdlineParser() + keys, arguments = cmdline_parser._parse_arguments( + "pos -not-enough dashes " + "--enough dashes -o my".split(" ")) + + assert arguments == OrderedDict(( + ('_pos_0', 'pos'), + ('not-enough', 'dashes'), + ('enough', 'dashes'), + ('o', 'my'))) + + assert keys == OrderedDict(( + ('_pos_0', '_pos_0'), + ('not-enough', '-not-enough'), + ('enough', '--enough'), + ('o', '-o'))) + + +def test_parse_fugly_underscores(): + """Test that underscores are kept as such no matter how fugly this is""" + cmdline_parser = CmdlineParser() + keys, arguments = cmdline_parser._parse_arguments( + "pos -my_poor eyes --are_bleeding because --of-these underscores".split(" ")) + + assert arguments == OrderedDict(( + ('_pos_0', 'pos'), + ('my_poor', 'eyes'), + ('are_bleeding', 'because'), + ('of-these', 'underscores'))) + + assert keys == OrderedDict(( + ('_pos_0', '_pos_0'), + ('my_poor', '-my_poor'), + ('are_bleeding', '--are_bleeding'), + ('of-these', '--of-these'))) diff --git a/tests/unittests/core/io/test_config.py b/tests/unittests/core/io/test_config.py index b1b93b16e..4a26e8f57 100644 --- a/tests/unittests/core/io/test_config.py +++ b/tests/unittests/core/io/test_config.py @@ -2,6 +2,8 @@ # -*- coding: utf-8 -*- """Example usage and tests for :mod:`orion.core.io.config`.""" +import argparse +import logging import os import pytest @@ -239,3 +241,284 @@ def test_local_precedence(yaml_path): assert config.test == "comme_ci_comme_ca" del os.environ['TOP_SECRET_MESSAGE'] + + +def test_overwrite_subconfig(): + """Test that subconfig cannot be overwritten""" + config = Configuration() + config.nested = Configuration() + with pytest.raises(ValueError) as exc: + config.add_option('nested', option_type=str) + assert "Configuration already contains nested" == str(exc.value) + + with pytest.raises(ValueError) as exc: + config.nested = Configuration() + assert "Configuration already contains subconfiguration nested" == str(exc.value) + + +def test_to_dict(): + """Test dictionary representation of the configuration""" + config = Configuration() + config.add_option('test', option_type=str, default="voici_voila") + config.nested = Configuration() + config.nested.add_option('test2', option_type=str, default="zici") + + assert config.to_dict() == { + 'test': 'voici_voila', + 'nested': { + 'test2': 'zici'}} + + config.test = 'hello' + config.nested.test2 = 'labas' + + assert config.to_dict() == { + 'test': 'hello', + 'nested': { + 'test2': 'labas'}} + + +def test_key_curation(): + """Test that both - and _ maps to same options""" + config = Configuration() + config.add_option('test-with-dashes', option_type=int, default=1) + config.add_option('test_with_underscores', option_type=int, default=2) + config.add_option('test-all_mixedup', option_type=int, default=3) + + assert config['test-with-dashes'] == 1 + assert config['test_with_underscores'] == 2 + assert config['test-all_mixedup'] == 3 + + assert config['test_with_dashes'] == 1 + assert config['test-with-underscores'] == 2 + assert config['test_all-mixedup'] == 3 + + assert config.test_with_dashes == 1 + assert config.test_with_underscores == 2 + assert config.test_all_mixedup == 3 + + config['test_with_dashes'] = 4 + assert config['test-with-dashes'] == 4 + + +def test_nested_key_curation(): + """Test that both - and _ maps to same options in nested configs as well""" + config = Configuration() + config.add_option('test-with-dashes', option_type=str, default="voici_voila") + config.nested = Configuration() + config.nested.add_option('test_with_underscores', option_type=str, default="zici") + + assert config['nested']['test_with_underscores'] == 'zici' + assert config['nested']['test-with-underscores'] == 'zici' + + config['nested']['test-with-underscores'] = 'labas' + assert config.nested.test_with_underscores == 'labas' + + +def test_help_option(): + """Verify adding documentation to options.""" + config = Configuration() + config.add_option('option', option_type=str, help='A useless option!') + + assert config.help('option') == 'A useless option!' + + +def test_help_nested_option(): + """Verify adding documentation to a nested option.""" + config = Configuration() + config.add_option('option', option_type=str, help='A useless option!') + config.nested = Configuration() + config.nested.add_option('option', option_type=str, help='A useless nested option!') + + assert config.help('nested.option') == 'A useless nested option!' + assert config.nested.help('option') == 'A useless nested option!' + + +def test_help_option_with_default(): + """Verify adding documentation to options with default value.""" + config = Configuration() + config.add_option('option', option_type=str, default='a', help='A useless option!') + + assert config.help('option') == 'A useless option! (default: a)' + + +def test_no_help_option(): + """Verify not adding documentation to options.""" + config = Configuration() + config.add_option('option', option_type=str) + + assert config.help('option') == 'Undocumented' + + +def test_argument_parser(): + """Verify the argument parser built based on config.""" + config = Configuration() + config.add_option('option', option_type=str) + + parser = argparse.ArgumentParser() + config.add_arguments(parser) + + options = parser.parse_args(['--option', 'a']) + + assert options.option == 'a' + + +def test_argument_parser_ignore_default(): + """Verify the argument parser does not get default values.""" + config = Configuration() + config.add_option('option', option_type=str, default='b') + + parser = argparse.ArgumentParser() + config.add_arguments(parser) + + options = parser.parse_args([]) + + assert options.option is None + + +def test_argument_parser_rename(): + """Verify the argument parser built based on config with some options renamed.""" + config = Configuration() + config.add_option('option', option_type=str) + + parser = argparse.ArgumentParser() + config.add_arguments(parser, rename=dict(option='--new-option')) + + with pytest.raises(SystemExit) as exc: + options = parser.parse_args(['--option', 'a']) + + assert exc.match('2') + + options = parser.parse_args(['--new-option', 'a']) + + assert options.new_option == 'a' + + +def test_argument_parser_dict_list_tuple(): + """Verify the argument parser does not contain options of type dict/list/tuple in config.""" + config = Configuration() + config.add_option('st', option_type=str) + config.add_option('di', option_type=dict) + config.add_option('li', option_type=list) + config.add_option('tu', option_type=tuple) + + parser = argparse.ArgumentParser() + config.add_arguments(parser) + + options = parser.parse_args([]) + assert vars(options) == {'st': None} + + with pytest.raises(SystemExit) as exc: + options = parser.parse_args(['--di', 'ct']) + + assert exc.match('2') + + +def test_deprecate_option(caplog): + """Test deprecating an option.""" + config = Configuration() + config.add_option( + 'option', option_type=str, default='hello', + deprecate=dict(version='v1.0', alternative='None! T_T')) + + config.add_option( + 'ok', option_type=str, default='hillo') + + # Access the deprecated option and trigger a warning. + with caplog.at_level(logging.WARNING, logger="orion.core.io.config"): + assert config.option == 'hello' + + assert caplog.record_tuples == [( + 'orion.core.io.config', logging.WARNING, + '(DEPRECATED) Option `option` will be removed in v1.0. Use `None! T_T` instead.')] + + caplog.clear() + + # Access the non-deprecated option and trigger no warnings. + with caplog.at_level(logging.WARNING, logger="orion.core.io.config"): + assert config.ok == 'hillo' + + assert caplog.record_tuples == [] + + +def test_deprecate_option_missing_version(): + """Verify option deprecation if version is missing.""" + config = Configuration() + with pytest.raises(ValueError) as exc: + config.add_option( + 'option', option_type=str, + deprecate=dict(alternative='None! T_T')) + + assert exc.match('`version` is missing in deprecate option') + + +def test_deprecate_option_no_alternative(caplog): + """Verify option deprecation when there is no alternative.""" + config = Configuration() + config.add_option( + 'option', option_type=str, default='hello', + deprecate=dict(version='v1.0')) + + # Access the deprecated option and trigger a warning. + with caplog.at_level(logging.WARNING, logger="orion.core.io.config"): + assert config.option == 'hello' + + assert caplog.record_tuples == [( + 'orion.core.io.config', logging.WARNING, + '(DEPRECATED) Option `option` will be removed in v1.0.')] + + +def test_deprecate_option_help(): + """Verify help message of a deprecated option.""" + config = Configuration() + config.add_option( + 'option', option_type=str, + deprecate=dict(version='v1.0', alternative='None! T_T'), + help='A useless option!') + + assert config.help('option') == '(DEPRECATED) A useless option!' + + +def test_deprecate_option_print_with_different_name(caplog): + """Verify deprecation warning with different name (for nested options).""" + config = Configuration() + config.add_option( + 'option', option_type=str, default='hello', + deprecate=dict(version='v1.0', alternative='None! T_T', name='nested.option')) + + # Access the deprecated option and trigger a warning. + with caplog.at_level(logging.WARNING, logger="orion.core.io.config"): + assert config.option == 'hello' + + assert caplog.record_tuples == [( + 'orion.core.io.config', logging.WARNING, + '(DEPRECATED) Option `nested.option` will be removed in v1.0. Use `None! T_T` instead.')] + + +def test_get_deprecated_key(caplog): + """Verify deprecation warning using get().""" + config = Configuration() + config.add_option( + 'option', option_type=str, default='hello', + deprecate=dict(version='v1.0', alternative='None! T_T')) + + # Access the deprecated option and trigger a warning. + with caplog.at_level(logging.WARNING, logger="orion.core.io.config"): + assert config.get('option') == 'hello' + + assert caplog.record_tuples == [( + 'orion.core.io.config', logging.WARNING, + '(DEPRECATED) Option `option` will be removed in v1.0. Use `None! T_T` instead.')] + + +def test_get_deprecated_key_ignore_warning(caplog): + """Verify deprecation warning using get(deprecated='ignore').""" + config = Configuration() + config.add_option( + 'option', option_type=str, default='hello', + deprecate=dict(version='v1.0', alternative='None! T_T')) + + # Access the deprecated option and trigger a warning. + with caplog.at_level(logging.WARNING, logger="orion.core.io.config"): + assert config.get('option', deprecated='ignore') == 'hello' + + assert caplog.record_tuples == [] diff --git a/tests/unittests/core/io/test_experiment_builder.py b/tests/unittests/core/io/test_experiment_builder.py index 9cac1dc7e..e4c17066e 100644 --- a/tests/unittests/core/io/test_experiment_builder.py +++ b/tests/unittests/core/io/test_experiment_builder.py @@ -1,315 +1,925 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- """Example usage and tests for :mod:`orion.core.io.experiment_builder`.""" -import pytest - -from orion.core.io.experiment_builder import ExperimentBuilder -from orion.core.utils.exceptions import NoConfigurationError - - -def get_db(exp): - """Transitional method to move away from mongodb""" - return exp._storage._db - - -def get_view_db(exp): - """Transitional method to move away from mongodb""" - return exp._experiment._storage._storage._db +import copy +import datetime +import pytest -@pytest.mark.usefixtures("clean_db") -def test_fetch_local_config(config_file): - """Test local config (default, env_vars, cmdconfig, cmdargs)""" +from orion.algo.base import BaseAlgorithm +from orion.algo.space import Space +from orion.core.evc.adapters import BaseAdapter +from orion.core.io.database.ephemeraldb import EphemeralDB +from orion.core.io.database.pickleddb import PickledDB +import orion.core.io.experiment_builder as experiment_builder +import orion.core.utils.backward as backward +from orion.core.utils.exceptions import BranchingEvent, NoConfigurationError, RaceCondition +from orion.core.utils.tests import OrionState, update_singletons +from orion.storage.base import get_storage +from orion.storage.legacy import Legacy + + +def count_experiments(): + """Count experiments in storage""" + return len(get_storage().fetch_experiments({})) + + +@pytest.fixture +def space(): + """Build a space definition""" + return {'x': 'uniform(-50,50)'} + + +@pytest.fixture() +def python_api_config(): + """Create a configuration without the cli fluff.""" + new_config = dict( + name='supernaekei', + version=1, + space={'x': 'uniform(0,10)'}, + metadata={'user': 'tsirif', + 'orion_version': 'XYZ', + 'VCS': {"type": "git", + "is_dirty": False, + "HEAD_sha": "test", + "active_branch": None, + "diff_sha": "diff"}}, + max_trials=1000, + working_dir='', + algorithms={ + 'dumbalgo': { + 'done': False, + 'judgement': None, + 'scoring': 0, + 'seed': None, + 'suspend': False, + 'value': 5}}, + producer={'strategy': 'NoParallelStrategy'}, + _id='fasdfasfa', + something_to_be_ignored='asdfa', + refers=dict( + root_id='supernaekei', + parent_id=None, + adapter=[]) + ) + + return new_config + + +@pytest.fixture() +def new_config(random_dt, script_path): + """Create a configuration that will not hit the database.""" + new_config = dict( + name='supernaekei', + metadata={'user': 'tsirif', + 'orion_version': 'XYZ', + 'user_script': script_path, + 'user_config': 'abs_path/hereitis.yaml', + 'user_args': [script_path, '--mini-batch~uniform(32, 256, discrete=True)'], + 'VCS': {"type": "git", + "is_dirty": False, + "HEAD_sha": "test", + "active_branch": None, + "diff_sha": "diff"}}, + version=1, + pool_size=10, + max_trials=1000, + working_dir='', + algorithms={ + 'dumbalgo': { + 'done': False, + 'judgement': None, + 'scoring': 0, + 'seed': None, + 'suspend': False, + 'value': 5}}, + producer={'strategy': 'NoParallelStrategy'}, + # attrs starting with '_' also + _id='fasdfasfa', + # and in general anything which is not in Experiment's slots + something_to_be_ignored='asdfa', + refers=dict( + root_id='supernaekei', + parent_id=None, + adapter=[]) + ) + + backward.populate_space(new_config) + + return new_config + + +@pytest.fixture +def parent_version_config(): + """Return a configuration for an experiment.""" + config = dict( + _id='parent_config', + name="old_experiment", + version=1, + algorithms='random', + metadata={'user': 'corneauf', 'datetime': datetime.datetime.utcnow(), + 'user_args': ['--x~normal(0,1)']}) + + backward.populate_space(config) + + return config + + +@pytest.fixture +def child_version_config(parent_version_config): + """Return a configuration for an experiment.""" + config = copy.deepcopy(parent_version_config) + config['_id'] = 'child_config' + config['version'] = 2 + config['refers'] = {'parent_id': 'parent_config'} + config['metadata']['datetime'] = datetime.datetime.utcnow() + config['metadata']['user_args'].append('--y~+normal(0,1)') + backward.populate_space(config) + return config + + +@pytest.mark.usefixtures('with_user_tsirif', 'version_XYZ') +def test_get_cmd_config(config_file): + """Test local config (cmdconfig, cmdargs)""" cmdargs = {"config": config_file} - local_config = ExperimentBuilder().fetch_full_config(cmdargs, use_db=False) + local_config = experiment_builder.get_cmd_config(cmdargs) assert local_config['algorithms'] == 'random' - assert local_config['database']['host'] == 'mongodb://user:pass@localhost' - assert local_config['database']['name'] == 'orion_test' - assert local_config['database']['type'] == 'mongodb' + assert local_config['strategy'] == 'NoParallelStrategy' assert local_config['max_trials'] == 100 assert local_config['name'] == 'voila_voici' assert local_config['pool_size'] == 1 - - -@pytest.mark.usefixtures("clean_db", "test_config") -def test_fetch_local_config_from_incomplete_config(incomplete_config_file): + assert local_config['storage'] == { + 'database': { + 'host': 'mongodb://user:pass@localhost', + 'name': 'orion_test', + 'type': 'mongodb'}} + assert local_config['metadata'] == { + 'orion_version': 'XYZ', + 'user': 'tsirif'} + + +@pytest.mark.usefixtures('with_user_tsirif', 'version_XYZ') +def test_get_cmd_config_from_incomplete_config(incomplete_config_file): """Test local config with incomplete user configuration file (default, env_vars, cmdconfig, cmdargs) This is to ensure merge_configs update properly the subconfigs """ cmdargs = {"config": incomplete_config_file} - local_config = ExperimentBuilder().fetch_full_config(cmdargs, use_db=False) - - assert local_config['algorithms'] == 'random' - assert local_config['database']['host'] == 'mongodb://user:pass@localhost' - assert local_config['database']['name'] == 'orion_test' - assert local_config['database']['type'] == 'incomplete' - assert local_config['max_trials'] == float('inf') + local_config = experiment_builder.get_cmd_config(cmdargs) + + assert 'algorithms' not in local_config + assert 'max_trials' not in local_config + assert 'pool_size' not in local_config + assert 'name' not in local_config['storage']['database'] + assert local_config['storage']['database']['host'] == 'mongodb://user:pass@localhost' + assert local_config['storage']['database']['type'] == 'incomplete' assert local_config['name'] == 'incomplete' - assert local_config['pool_size'] == 1 + assert local_config['metadata'] == { + 'orion_version': 'XYZ', + 'user': 'tsirif'} -@pytest.mark.usefixtures("clean_db", "null_db_instances", "with_user_tsirif") -def test_fetch_config_from_db_no_hit(config_file, random_dt): +def test_fetch_config_from_db_no_hit(): """Verify that fetch_config_from_db returns an empty dict when the experiment is not in db""" - cmdargs = {'name': 'supernaekei', 'config': config_file} - db_config = ExperimentBuilder().fetch_config_from_db(cmdargs) + with OrionState(experiments=[], trials=[]): + db_config = experiment_builder.fetch_config_from_db(name='supernaekei') + assert db_config == {} -@pytest.mark.usefixtures("clean_db", "null_db_instances", "with_user_tsirif") -def test_fetch_config_from_db_hit(config_file, exp_config, random_dt): +@pytest.mark.usefixtures('with_user_tsirif') +def test_fetch_config_from_db_hit(new_config): """Verify db config when experiment is in db""" - cmdargs = {'name': 'supernaedo2-dendi', 'config': config_file} - db_config = ExperimentBuilder().fetch_config_from_db(cmdargs) - - assert db_config['name'] == exp_config[0][0]['name'] - assert db_config['refers'] == exp_config[0][0]['refers'] - assert db_config['metadata'] == exp_config[0][0]['metadata'] - assert db_config['pool_size'] == exp_config[0][0]['pool_size'] - assert db_config['max_trials'] == exp_config[0][0]['max_trials'] - assert db_config['algorithms'] == exp_config[0][0]['algorithms'] - - -@pytest.mark.usefixtures("clean_db", "null_db_instances", "with_user_dendi", - "mock_infer_versioning_metadata") -def test_fetch_full_config_new_config(config_file, exp_config, random_dt, script_path): - """Verify full config with new config (causing branch)""" - cmdargs = {'name': 'supernaedo2-dendi', - 'config': config_file, - 'user_args': [script_path, - "--encoding_layer~choices(['rnn', 'lstm', 'gru'])", - "--decoding_layer~choices(['rnn', 'lstm_with_attention', 'gru'])"]} - full_config = ExperimentBuilder().fetch_full_config(cmdargs) - cmdconfig = ExperimentBuilder().fetch_file_config(cmdargs) - - full_config['metadata']['orion_version'] = exp_config[0][0]['metadata']['orion_version'] - - assert full_config['name'] == exp_config[0][0]['name'] - assert full_config['refers'] == exp_config[0][0]['refers'] - assert full_config['metadata'] == exp_config[0][0]['metadata'] - assert full_config['pool_size'] == cmdconfig['pool_size'] - assert full_config['max_trials'] == cmdconfig['max_trials'] - assert full_config['algorithms'] == cmdconfig['algorithms'] - - -@pytest.mark.usefixtures("clean_db", "null_db_instances", "with_user_dendi", - "mock_infer_versioning_metadata") -def test_fetch_full_config_old_config(old_config_file, exp_config, random_dt, script_path): - """Verify full config with old config (not causing branch)""" - cmdargs = {'name': 'supernaedo2-dendi', - 'config': old_config_file, - 'user_args': [script_path, - "--encoding_layer~choices(['rnn', 'lstm', 'gru'])", - "--decoding_layer~choices(['rnn', 'lstm_with_attention', 'gru'])"]} - - full_config = ExperimentBuilder().fetch_full_config(cmdargs) - cmdconfig = ExperimentBuilder().fetch_file_config(cmdargs) - - full_config['metadata']['orion_version'] = exp_config[0][0]['metadata']['orion_version'] + with OrionState(experiments=[new_config], trials=[]): + db_config = experiment_builder.fetch_config_from_db(name='supernaekei') - assert full_config['name'] == exp_config[0][0]['name'] - assert full_config['refers'] == exp_config[0][0]['refers'] - assert full_config['metadata'] == exp_config[0][0]['metadata'] - assert full_config['pool_size'] == cmdconfig['pool_size'] - assert full_config['max_trials'] == cmdconfig['max_trials'] - assert full_config['algorithms'] == cmdconfig['algorithms'] + assert db_config['name'] == new_config['name'] + assert db_config['refers'] == new_config['refers'] + assert db_config['metadata'] == new_config['metadata'] + assert db_config['pool_size'] == new_config['pool_size'] + assert db_config['max_trials'] == new_config['max_trials'] + assert db_config['algorithms'] == new_config['algorithms'] + assert db_config['metadata'] == new_config['metadata'] -@pytest.mark.usefixtures("clean_db", "null_db_instances", "with_user_tsirif") -def test_fetch_full_config_no_hit(config_file, exp_config, random_dt): - """Verify full config when experiment not in db""" +@pytest.mark.usefixtures("with_user_tsirif") +def test_build_view_from_args_no_hit(config_file): + """Try building experiment view when not in db""" cmdargs = {'name': 'supernaekei', 'config': config_file} - full_config = ExperimentBuilder().fetch_full_config(cmdargs) - assert full_config['name'] == 'supernaekei' - assert full_config['algorithms'] == 'random' - assert full_config['max_trials'] == 100 - assert full_config['name'] == 'supernaekei' - assert full_config['pool_size'] == 1 - assert full_config['metadata']['user'] == 'tsirif' - assert 'datetime' not in full_config['metadata'] - assert 'refers' not in full_config + with OrionState(experiments=[], trials=[]): + with pytest.raises(ValueError) as exc_info: + experiment_builder.build_view_from_args(cmdargs) + assert "No experiment with given name 'supernaekei' and version '*'" in str(exc_info.value) -@pytest.mark.usefixtures("clean_db", "null_db_instances", "with_user_tsirif") -def test_build_view_from_no_hit(config_file, create_db_instance, exp_config): - """Try building experiment view when not in db""" +@pytest.mark.usefixtures("with_user_tsirif") +def test_build_view_from_args_hit(config_file, random_dt, new_config): + """Try building experiment view when in db""" cmdargs = {'name': 'supernaekei', 'config': config_file} - with pytest.raises(ValueError) as exc_info: - ExperimentBuilder().build_view_from(cmdargs) - assert "No experiment with given name 'supernaekei' for user 'tsirif'" in str(exc_info.value) + with OrionState(experiments=[new_config], trials=[]): + exp_view = experiment_builder.build_view_from_args(cmdargs) + assert exp_view._id == new_config['_id'] + assert exp_view.name == new_config['name'] + assert exp_view.configuration['refers'] == new_config['refers'] + assert exp_view.metadata == new_config['metadata'] + assert exp_view.pool_size == new_config['pool_size'] + assert exp_view.max_trials == new_config['max_trials'] + assert exp_view.algorithms.configuration == new_config['algorithms'] -@pytest.mark.usefixtures("clean_db", "null_db_instances", "with_user_tsirif") -def test_build_view_from(config_file, create_db_instance, exp_config, random_dt): - """Try building experiment view when in db""" - cmdargs = {'name': 'supernaedo2-dendi', 'config': config_file} - exp_view = ExperimentBuilder().build_view_from(cmdargs) - - assert exp_view._experiment._init_done is False - assert get_view_db(exp_view) is create_db_instance - assert exp_view._id == exp_config[0][0]['_id'] - assert exp_view.name == exp_config[0][0]['name'] - assert exp_view.configuration['refers'] == exp_config[0][0]['refers'] - assert exp_view.metadata == exp_config[0][0]['metadata'] - assert exp_view.pool_size == exp_config[0][0]['pool_size'] - assert exp_view.max_trials == exp_config[0][0]['max_trials'] - # TODO: Views are not fully configured until configuration is refactored - # assert exp_view.algorithms.configuration == exp_config[0][0]['algorithms'] - - -@pytest.mark.usefixtures("clean_db", "null_db_instances", "with_user_dendi") -def test_build_from_no_hit(config_file, create_db_instance, exp_config, random_dt, script_path): + +@pytest.mark.usefixtures("with_user_dendi") +def test_build_from_args_no_hit(config_file, random_dt, script_path, new_config): """Try building experiment when not in db""" cmdargs = {'name': 'supernaekei', 'config': config_file, 'user_args': [script_path, 'x~uniform(0,10)']} - with pytest.raises(ValueError) as exc_info: - ExperimentBuilder().build_view_from(cmdargs) - assert "No experiment with given name 'supernaekei' for user 'dendi'" in str(exc_info.value) + with OrionState(experiments=[], trials=[]): + with pytest.raises(ValueError) as exc_info: + experiment_builder.build_view_from_args(cmdargs) + assert "No experiment with given name 'supernaekei' and version '*'" in str(exc_info.value) - exp = ExperimentBuilder().build_from(cmdargs) + exp = experiment_builder.build_from_args(cmdargs) - assert exp._init_done is True - assert get_db(exp) is create_db_instance assert exp.name == cmdargs['name'] assert exp.configuration['refers'] == {'adapter': [], 'parent_id': None, 'root_id': exp._id} assert exp.metadata['datetime'] == random_dt assert exp.metadata['user'] == 'dendi' assert exp.metadata['user_script'] == cmdargs['user_args'][0] - assert exp.metadata['user_args'] == cmdargs['user_args'][1:] + assert exp.metadata['user_args'] == cmdargs['user_args'] assert exp.pool_size == 1 assert exp.max_trials == 100 assert exp.algorithms.configuration == {'random': {'seed': None}} -@pytest.mark.usefixtures("version_XYZ", "clean_db", "null_db_instances", "with_user_dendi", - "mock_infer_versioning_metadata") -def test_build_from_hit(old_config_file, create_db_instance, exp_config, script_path): +@pytest.mark.usefixtures("version_XYZ", "with_user_tsirif", "mock_infer_versioning_metadata") +def test_build_from_args_hit(old_config_file, script_path, new_config): """Try building experiment when in db (no branch)""" - cmdargs = {'name': 'supernaedo2-dendi', + cmdargs = {'name': 'supernaekei', 'config': old_config_file, 'user_args': [script_path, - "--encoding_layer~choices(['rnn', 'lstm', 'gru'])", - "--decoding_layer~choices(['rnn', 'lstm_with_attention', 'gru'])"]} - - # Test that experiment already exists - ExperimentBuilder().build_view_from(cmdargs) - exp = ExperimentBuilder().build_from(cmdargs) - - assert exp._init_done is True - assert get_db(exp) is create_db_instance - assert exp._id == exp_config[0][0]['_id'] - assert exp.name == exp_config[0][0]['name'] - assert exp.configuration['refers'] == exp_config[0][0]['refers'] - assert exp.metadata == exp_config[0][0]['metadata'] - assert exp.pool_size == exp_config[0][0]['pool_size'] - assert exp.max_trials == exp_config[0][0]['max_trials'] - assert exp.algorithms.configuration == exp_config[0][0]['algorithms'] - - -@pytest.mark.usefixtures("clean_db", "null_db_instances", "with_user_bouthilx") -def test_build_from_force_user(old_config_file, create_db_instance, exp_config, random_dt): + '--mini-batch~uniform(32, 256, discrete=True)']} + + with OrionState(experiments=[new_config], trials=[]): + # Test that experiment already exists + experiment_builder.build_view_from_args(cmdargs) + + exp = experiment_builder.build_from_args(cmdargs) + + assert exp._id == new_config['_id'] + assert exp.name == new_config['name'] + assert exp.version == 1 + assert exp.configuration['refers'] == new_config['refers'] + assert exp.metadata == new_config['metadata'] + assert exp.max_trials == new_config['max_trials'] + assert exp.algorithms.configuration == new_config['algorithms'] + + +@pytest.mark.usefixtures("with_user_bouthilx") +def test_build_from_args_force_user(new_config): """Try building experiment view when in db""" - cmdargs = {'name': 'supernaedo2', 'config': old_config_file} + cmdargs = {'name': new_config['name']} cmdargs['user'] = 'tsirif' - exp_view = ExperimentBuilder().build_from(cmdargs) + with OrionState(experiments=[new_config], trials=[]): + # Test that experiment already exists + exp_view = experiment_builder.build_from_args(cmdargs) assert exp_view.metadata['user'] == 'tsirif' -@pytest.mark.usefixtures("version_XYZ", "clean_db", "null_db_instances", "with_user_tsirif") -def test_build_from_config_no_hit(config_file, create_db_instance, exp_config, random_dt, - script_path): +@pytest.mark.usefixtures("setup_pickleddb_database") +def test_build_from_args_debug_mode(script_path): + """Try building experiment in debug mode""" + update_singletons() + experiment_builder.build_from_args( + {'name': 'whatever', + 'user_args': [script_path, '--mini-batch~uniform(32, 256)']}) + + storage = get_storage() + + assert isinstance(storage, Legacy) + assert isinstance(storage._db, PickledDB) + + update_singletons() + + experiment_builder.build_from_args( + {'name': 'whatever', + 'user_args': [script_path, '--mini-batch~uniform(32, 256)'], + 'debug': True}) + storage = get_storage() + + assert isinstance(storage, Legacy) + assert isinstance(storage._db, EphemeralDB) + + +@pytest.mark.usefixtures("setup_pickleddb_database") +def test_build_view_from_args_debug_mode(script_path): + """Try building experiment view in debug mode""" + update_singletons() + + # Can't build view if none exist. It's fine we only want to test the storage creation. + with pytest.raises(ValueError): + experiment_builder.build_view_from_args({'name': 'whatever'}) + + storage = get_storage() + + assert isinstance(storage, Legacy) + assert isinstance(storage._db, PickledDB) + + update_singletons() + + # Can't build view if none exist. It's fine we only want to test the storage creation. + with pytest.raises(ValueError): + experiment_builder.build_view_from_args({'name': 'whatever', 'debug': True}) + + storage = get_storage() + + assert isinstance(storage, Legacy) + assert isinstance(storage._db, EphemeralDB) + + +@pytest.mark.usefixtures("with_user_tsirif", "version_XYZ") +def test_build_no_hit(config_file, random_dt, script_path): """Try building experiment from config when not in db""" - cmdargs = {'name': 'supernaekei', 'config': config_file, - 'user_args': [script_path, - '-x~uniform(0,10)']} + name = 'supernaekei' + space = {'x': 'uniform(0, 10)'} + max_trials = 100 - with pytest.raises(ValueError) as exc_info: - ExperimentBuilder().build_view_from(cmdargs) - assert "No experiment with given name 'supernaekei' for user 'tsirif'" in str(exc_info.value) + with OrionState(experiments=[], trials=[]): - full_config = ExperimentBuilder().fetch_full_config(cmdargs) - exp = ExperimentBuilder().build_from_config(full_config) + with pytest.raises(ValueError) as exc_info: + experiment_builder.build_view(name) + assert "No experiment with given name 'supernaekei' and version '*'" in str(exc_info.value) - assert exp._init_done is True - assert get_db(exp) is create_db_instance - assert exp.name == cmdargs['name'] + exp = experiment_builder.build(name, space=space, max_trials=max_trials) + + assert exp.name == name assert exp.configuration['refers'] == {'adapter': [], 'parent_id': None, 'root_id': exp._id} - assert exp.metadata['datetime'] == random_dt - assert exp.metadata['user'] == 'tsirif' - assert exp.metadata['user_script'] == cmdargs['user_args'][0] - assert exp.metadata['user_args'] == cmdargs['user_args'][1:] - assert exp.pool_size == 1 - assert exp.max_trials == 100 + assert exp.metadata == { + 'datetime': random_dt, + 'user': 'tsirif', + 'orion_version': 'XYZ'} + assert exp.configuration['space'] == space + assert exp.max_trials == max_trials assert not exp.is_done assert exp.algorithms.configuration == {'random': {'seed': None}} -@pytest.mark.usefixtures("clean_db") -def test_build_from_config_no_commandline_config(config_file): +def test_build_no_commandline_config(): """Try building experiment with no commandline configuration.""" - cmdargs = {'name': 'supernaekei', 'config': config_file} - full_config = ExperimentBuilder().fetch_full_config(cmdargs) - - with pytest.raises(NoConfigurationError): - ExperimentBuilder().build_from_config(full_config) + with OrionState(experiments=[], trials=[]): + with pytest.raises(NoConfigurationError): + experiment_builder.build('supernaekei') -@pytest.mark.usefixtures("clean_db", "null_db_instances", "with_user_tsirif") -def test_build_from_config_hit(old_config_file, create_db_instance, exp_config, script_path): +@pytest.mark.usefixtures("with_user_dendi", "mock_infer_versioning_metadata", "version_XYZ") +def test_build_hit(python_api_config): """Try building experiment from config when in db (no branch)""" - cmdargs = {'name': 'supernaedo2-dendi', - 'config': old_config_file, - 'user_args': [script_path, - "--encoding_layer~choices(['rnn', 'lstm', 'gru'])", - "--decoding_layer~choices(['rnn', 'lstm_with_attention', 'gru'])"]} + name = 'supernaekei' + + with OrionState(experiments=[python_api_config], trials=[]): - # Test that experiment already exists - ExperimentBuilder().build_view_from(cmdargs) + # Test that experiment already exists (this should fail otherwise) + experiment_builder.build_view(name=name) - exp_view = ExperimentBuilder().build_view_from(cmdargs) - exp = ExperimentBuilder().build_from_config(exp_view.configuration) + exp = experiment_builder.build(**python_api_config) - assert exp._init_done is True - assert get_db(exp) is create_db_instance - assert exp._id == exp_config[0][0]['_id'] - assert exp.name == exp_config[0][0]['name'] - assert exp.configuration['refers'] == exp_config[0][0]['refers'] - assert exp.metadata == exp_config[0][0]['metadata'] - assert exp.pool_size == exp_config[0][0]['pool_size'] - assert exp.max_trials == exp_config[0][0]['max_trials'] - assert exp.algorithms.configuration == exp_config[0][0]['algorithms'] + assert exp._id == python_api_config['_id'] + assert exp.name == python_api_config['name'] + assert exp.configuration['refers'] == python_api_config['refers'] + python_api_config['metadata']['user'] = 'dendi' + assert exp.metadata == python_api_config['metadata'] + assert exp.max_trials == python_api_config['max_trials'] + assert exp.algorithms.configuration == python_api_config['algorithms'] -@pytest.mark.usefixtures("clean_db", "null_db_instances", "with_user_dendi") -def test_build_without_config_hit(old_config_file, create_db_instance, exp_config, script_path): +@pytest.mark.usefixtures("with_user_tsirif", "version_XYZ") +def test_build_without_config_hit(python_api_config): """Try building experiment without commandline config when in db (no branch)""" - cmdargs = {'name': 'supernaedo2-dendi', - 'config': old_config_file, - 'user_args': [script_path, - "--encoding_layer~choices(['rnn', 'lstm', 'gru'])", - "--decoding_layer~choices(['rnn', 'lstm_with_attention', 'gru'])"]} - - # Test that experiment already exists - ExperimentBuilder().build_view_from(cmdargs) - - cmdargs = {'name': 'supernaedo2-dendi'} - - exp_view = ExperimentBuilder().build_view_from(cmdargs) - exp = ExperimentBuilder().build_from_config(exp_view.configuration) - - assert exp._init_done is True - assert get_db(exp) is create_db_instance - assert exp._id == exp_config[0][0]['_id'] - assert exp.name == exp_config[0][0]['name'] - assert exp.configuration['refers'] == exp_config[0][0]['refers'] - assert exp.metadata == exp_config[0][0]['metadata'] - assert exp.pool_size == exp_config[0][0]['pool_size'] - assert exp.max_trials == exp_config[0][0]['max_trials'] - assert exp.algorithms.configuration == exp_config[0][0]['algorithms'] + name = 'supernaekei' + + with OrionState(experiments=[python_api_config], trials=[]): + + # Test that experiment already exists (this should fail otherwise) + experiment_builder.build_view(name=name) + + exp = experiment_builder.build(name=name) + + assert exp._id == python_api_config['_id'] + assert exp.name == python_api_config['name'] + assert exp.configuration['refers'] == python_api_config['refers'] + assert exp.metadata == python_api_config['metadata'] + assert exp.max_trials == python_api_config['max_trials'] + assert exp.algorithms.configuration == python_api_config['algorithms'] + + +@pytest.mark.usefixtures("with_user_tsirif", "version_XYZ") +def test_build_from_args_without_cmd(old_config_file, script_path, new_config): + """Try building experiment without commandline when in db (no branch)""" + name = 'supernaekei' + + cmdargs = {'name': name, + 'config': old_config_file} + + with OrionState(experiments=[new_config], trials=[]): + # Test that experiment already exists (this should fail otherwise) + experiment_builder.build_view_from_args(cmdargs) + + exp = experiment_builder.build_from_args(cmdargs) + + assert exp._id == new_config['_id'] + assert exp.name == new_config['name'] + assert exp.configuration['refers'] == new_config['refers'] + assert exp.metadata == new_config['metadata'] + assert exp.max_trials == new_config['max_trials'] + assert exp.algorithms.configuration == new_config['algorithms'] + + +@pytest.mark.usefixtures("with_user_tsirif") +class TestExperimentVersioning(object): + """Create new Experiment with auto-versioning.""" + + def test_new_experiment_wout_version(self, space): + """Create a new and never-seen-before experiment without a version.""" + with OrionState(): + exp = experiment_builder.build(name="exp_wout_version", space=space) + + assert exp.version == 1 + + def test_new_experiment_w_version(self, space): + """Create a new and never-seen-before experiment with a version.""" + with OrionState(): + exp = experiment_builder.build(name="exp_wout_version", version=1, space=space) + + assert exp.version == 1 + + def test_backward_compatibility_no_version(self, parent_version_config): + """Branch from parent that has no version field.""" + parent_version_config.pop('version') + with OrionState(experiments=[parent_version_config]): + exp = experiment_builder.build(name=parent_version_config["name"], + space={'y': 'uniform(0, 10)'}) + + assert exp.version == 2 + + def test_old_experiment_wout_version(self, parent_version_config): + """Create an already existing experiment without a version.""" + with OrionState(experiments=[parent_version_config]): + exp = experiment_builder.build(name=parent_version_config["name"]) + + assert exp.version == 1 + + def test_old_experiment_2_wout_version(self, parent_version_config, child_version_config): + """Create an already existing experiment without a version and getting last one.""" + with OrionState(experiments=[parent_version_config, child_version_config]): + exp = experiment_builder.build(name=parent_version_config["name"]) + + assert exp.version == 2 + + def test_old_experiment_w_version(self, parent_version_config, child_version_config): + """Create an already existing experiment with a version.""" + with OrionState(experiments=[parent_version_config, child_version_config]): + exp = experiment_builder.build(name=parent_version_config["name"], version=1) + + assert exp.version == 1 + + def test_old_experiment_w_version_bigger_than_max(self, parent_version_config, + child_version_config): + """Create an already existing experiment with a too large version.""" + print(child_version_config['name']) + with OrionState(experiments=[parent_version_config, child_version_config]): + exp = experiment_builder.build(name=parent_version_config["name"], version=8) + + assert exp.version == 2 + + +@pytest.mark.usefixtures("with_user_tsirif", "version_XYZ") +class TestBuild(object): + """Test building the experiment""" + + def test_good_set_before_init_hit_no_diffs_exc_max_trials(self, new_config): + """Trying to set, and NO differences were found from the config pulled from db. + + Everything is normal, nothing changes. Experiment is resumed, + perhaps with more trials to evaluate (an exception is 'max_trials'). + """ + with OrionState(experiments=[new_config], trials=[]): + + new_config['max_trials'] = 5000 + + exp = experiment_builder.build(**new_config) + + # Deliver an external configuration to finalize init + new_config['algorithms']['dumbalgo']['done'] = False + new_config['algorithms']['dumbalgo']['judgement'] = None + new_config['algorithms']['dumbalgo']['scoring'] = 0 + new_config['algorithms']['dumbalgo']['suspend'] = False + new_config['algorithms']['dumbalgo']['value'] = 5 + new_config['algorithms']['dumbalgo']['seed'] = None + new_config['producer']['strategy'] = "NoParallelStrategy" + new_config.pop('something_to_be_ignored') + assert exp.configuration == new_config + + def test_good_set_before_init_no_hit(self, random_dt, new_config): + """Trying to set, overwrite everything from input.""" + with OrionState(experiments=[], trials=[]): + exp = experiment_builder.build(**new_config) + found_config = list(get_storage().fetch_experiments({'name': 'supernaekei', + 'metadata.user': 'tsirif'})) + + new_config['metadata']['datetime'] = exp.metadata['datetime'] + + assert len(found_config) == 1 + _id = found_config[0].pop('_id') + assert _id != 'fasdfasfa' + assert exp._id == _id + new_config['refers'] = {} + new_config.pop('_id') + new_config.pop('something_to_be_ignored') + new_config['algorithms']['dumbalgo']['done'] = False + new_config['algorithms']['dumbalgo']['judgement'] = None + new_config['algorithms']['dumbalgo']['scoring'] = 0 + new_config['algorithms']['dumbalgo']['suspend'] = False + new_config['algorithms']['dumbalgo']['value'] = 5 + new_config['algorithms']['dumbalgo']['seed'] = None + new_config['refers'] = {'adapter': [], 'parent_id': None, 'root_id': _id} + assert found_config[0] == new_config + assert exp.name == new_config['name'] + assert exp.configuration['refers'] == new_config['refers'] + assert exp.metadata == new_config['metadata'] + assert exp.pool_size == new_config['pool_size'] + assert exp.max_trials == new_config['max_trials'] + assert exp.working_dir == new_config['working_dir'] + assert exp.version == new_config['version'] + assert exp.algorithms.configuration == new_config['algorithms'] + + def test_working_dir_is_correctly_set(self, new_config): + """Check if working_dir is correctly changed.""" + with OrionState(): + new_config['working_dir'] = './' + exp = experiment_builder.build(**new_config) + storage = get_storage() + found_config = list(storage.fetch_experiments({'name': 'supernaekei', + 'metadata.user': 'tsirif'})) + + found_config = found_config[0] + exp = experiment_builder.build(**found_config) + assert exp.working_dir == './' + + def test_working_dir_works_when_db_absent(self, database, new_config): + """Check if working_dir is correctly when absent from the database.""" + with OrionState(experiments=[], trials=[]): + exp = experiment_builder.build(**new_config) + storage = get_storage() + found_config = list(storage.fetch_experiments({'name': 'supernaekei', + 'metadata.user': 'tsirif'})) + + found_config = found_config[0] + exp = experiment_builder.build(**found_config) + assert exp.working_dir == '' + + def test_configuration_hit_no_diffs(self, new_config): + """Return a configuration dict according to an experiment object. + + Before initialization is done, it can be the case that the pair (`name`, + user's name) has not hit the database. return a yaml compliant form + of current state, to be used with :mod:`orion.core.cli.esolve_config`. + """ + with OrionState(experiments=[new_config], trials=[]): + experiment_count_before = count_experiments() + exp = experiment_builder.build(**new_config) + assert experiment_count_before == count_experiments() + + new_config['algorithms']['dumbalgo']['done'] = False + new_config['algorithms']['dumbalgo']['judgement'] = None + new_config['algorithms']['dumbalgo']['scoring'] = 0 + new_config['algorithms']['dumbalgo']['suspend'] = False + new_config['algorithms']['dumbalgo']['value'] = 5 + new_config['algorithms']['dumbalgo']['seed'] = None + new_config['producer']['strategy'] = "NoParallelStrategy" + new_config.pop('something_to_be_ignored') + assert exp.configuration == new_config + + def test_instantiation_after_init(self, new_config): + """Verify that algo, space and refers was instanciated properly""" + with OrionState(experiments=[new_config], trials=[]): + exp = experiment_builder.build(**new_config) + + assert isinstance(exp.algorithms, BaseAlgorithm) + assert isinstance(exp.space, Space) + assert isinstance(exp.refers['adapter'], BaseAdapter) + + def test_algo_case_insensitive(self, new_config): + """Verify that algo with uppercase or lowercase leads to same experiment""" + with OrionState(experiments=[new_config], trials=[]): + new_config['algorithms']['DUMBALGO'] = new_config['algorithms'].pop('dumbalgo') + exp = experiment_builder.build(**new_config) + + assert exp.version == 1 + + def test_hierarchical_space(self, new_config): + """Verify space can have hierarchical structure""" + space = {'a': {'x': 'uniform(0, 10, discrete=True)'}, + 'b': {'y': 'loguniform(1e-08, 1)', + 'z': 'choices([\'voici\', \'voila\', 2])'}} + + with OrionState(experiments=[], trials=[]): + exp = experiment_builder.build('hierarchy', space=space) + + exp2 = experiment_builder.build('hierarchy') + + assert 'a.x' in exp.space + assert 'b.y' in exp.space + assert 'b.z' in exp.space + + # Make sure it can be fetched properly from db as well + assert 'a.x' in exp2.space + assert 'b.y' in exp2.space + assert 'b.z' in exp2.space + + def test_try_set_after_race_condition(self, new_config, monkeypatch): + """Cannot set a configuration after init if it looses a race + condition. + + The experiment from process which first writes to db is initialized + properly. The experiment which looses the race condition cannot be + initialized and needs to be rebuilt. + """ + with OrionState(experiments=[new_config], trials=[]): + experiment_count_before = count_experiments() + + def insert_race_condition(*args, **kwargs): + if insert_race_condition.count == 0: + data = {} + else: + data = new_config + + insert_race_condition.count += 1 + + return data + + insert_race_condition.count = 0 + + monkeypatch.setattr(experiment_builder, 'fetch_config_from_db', insert_race_condition) + + experiment_builder.build(**new_config) + + assert experiment_count_before == count_experiments() + + # Should be called + # - once in build(), + # -> then register fails, + # - then called once again in build, + # - then called in build_view to evaluate the conflicts + assert insert_race_condition.count == 3 + + def test_algorithm_config_with_just_a_string(self): + """Test that configuring an algorithm with just a string is OK.""" + name = 'supernaedo3' + space = {'x': 'uniform(0,10)'} + algorithms = 'dumbalgo' + + with OrionState(experiments=[], trials=[]): + exp = experiment_builder.build(name=name, space=space, algorithms=algorithms) + + assert exp.configuration['algorithms'] == { + 'dumbalgo': { + 'done': False, + 'judgement': None, + 'scoring': 0, + 'suspend': False, + 'value': 5, + 'seed': None}} + + def test_new_child_with_branch(self): + """Check that experiment is not incremented when branching with a new name.""" + name = 'parent' + space = {'x': 'uniform(0, 10)'} + + with OrionState(experiments=[], trials=[]): + parent = experiment_builder.build(name=name, space=space) + + assert parent.name == name + assert parent.version == 1 + + child_name = 'child' + + child = experiment_builder.build(name=name, branching={'branch_to': child_name}) + + assert child.name == child_name + assert child.version == 1 + assert child.refers['parent_id'] == parent.id + + child_name = 'child2' + + child = experiment_builder.build(name=child_name, branching={'branch_from': name}) + + assert child.name == child_name + assert child.version == 1 + assert child.refers['parent_id'] == parent.id + + def test_no_increment_when_child_exist(self): + """Check that experiment cannot be incremented when asked for v1 while v2 exists.""" + name = 'parent' + space = {'x': 'uniform(0,10)'} + + with OrionState(experiments=[], trials=[]): + parent = experiment_builder.build(name=name, space=space) + child = experiment_builder.build(name=name, space={'x': 'loguniform(1,10)'}) + assert child.name == parent.name + assert parent.version == 1 + assert child.version == 2 + + with pytest.raises(BranchingEvent) as exc_info: + experiment_builder.build(name=name, version=1, space={'x': 'loguniform(1,10)'}) + assert 'Configuration is different and generates a branching' in str(exc_info.value) + + def test_race_condition_wout_version(self, monkeypatch): + """Test that an experiment loosing the race condition during version increment raises + RaceCondition if version number was not specified. + """ + name = 'parent' + space = {'x': 'uniform(0,10)'} + + with OrionState(experiments=[], trials=[]): + parent = experiment_builder.build(name, space=space) + child = experiment_builder.build(name=name, space={'x': 'loguniform(1,10)'}) + assert child.name == parent.name + assert parent.version == 1 + assert child.version == 2 + + # Either + # 1. + # fetch_config_from_db only fetch parent + # test_version finds other child + # -> Detect race condition looking at conflicts + # 2. + # fetch_config_from_db only fetch parent + # test_version do not find other child + # -> DuplicateKeyError + + def insert_race_condition_1(self, query): + is_auto_version_query = (query == {'name': name, 'refers.parent_id': parent.id}) + if is_auto_version_query: + data = [child.configuration] + # First time the query returns no other child + elif insert_race_condition_1.count < 1: + data = [parent.configuration] + else: + data = [parent.configuration, child.configuration] + + insert_race_condition_1.count += int(is_auto_version_query) + + return data + + insert_race_condition_1.count = 0 + + monkeypatch.setattr(get_storage().__class__, 'fetch_experiments', + insert_race_condition_1) + + with pytest.raises(RaceCondition) as exc_info: + experiment_builder.build(name=name, space={'x': 'loguniform(1,10)'}) + assert 'There was likely a race condition during version' in str(exc_info.value) + + def insert_race_condition_2(self, query): + is_auto_version_query = (query == {'name': name, 'refers.parent_id': parent.id}) + # First time the query returns no other child + if is_auto_version_query: + data = [] + elif insert_race_condition_2.count < 1: + data = [parent.configuration] + else: + data = [parent.configuration, child.configuration] + + insert_race_condition_2.count += int(is_auto_version_query) + + return data + + insert_race_condition_2.count = 0 + + monkeypatch.setattr(get_storage().__class__, 'fetch_experiments', + insert_race_condition_2) + + with pytest.raises(RaceCondition) as exc_info: + experiment_builder.build(name=name, space={'x': 'loguniform(1,10)'}) + assert 'There was a race condition during branching.' in str(exc_info.value) + + def test_race_condition_w_version(self, monkeypatch): + """Test that an experiment loosing the race condition during version increment cannot + be resolved automatically if a version number was specified. + + Note that if we would raise RaceCondition, the conflict would still occur since + the version number fetched will not be the new one from the resolution but the requested + one. Therefore raising and handling RaceCondition would lead to infinite recursion in + the experiment builder. + """ + name = 'parent' + space = {'x': 'uniform(0,10)'} + + with OrionState(experiments=[], trials=[]): + parent = experiment_builder.build(name, space=space) + child = experiment_builder.build(name=name, space={'x': 'loguniform(1,10)'}) + assert child.name == parent.name + assert parent.version == 1 + assert child.version == 2 + + # Either + # 1. + # fetch_config_from_db only fetch parent + # test_version finds other child + # -> Detect race condition looking at conflicts + # 2. + # fetch_config_from_db only fetch parent + # test_version do not find other child + # -> DuplicateKeyError + + def insert_race_condition_1(self, query): + is_auto_version_query = (query == {'name': name, 'refers.parent_id': parent.id}) + if is_auto_version_query: + data = [child.configuration] + # First time the query returns no other child + elif insert_race_condition_1.count < 1: + data = [parent.configuration] + else: + data = [parent.configuration, child.configuration] + + insert_race_condition_1.count += int(is_auto_version_query) + + return data + + insert_race_condition_1.count = 0 + + monkeypatch.setattr(get_storage().__class__, 'fetch_experiments', + insert_race_condition_1) + + with pytest.raises(BranchingEvent) as exc_info: + experiment_builder.build(name=name, version=1, space={'x': 'loguniform(1,10)'}) + assert 'Configuration is different and generates' in str(exc_info.value) + + def insert_race_condition_2(self, query): + is_auto_version_query = (query == {'name': name, 'refers.parent_id': parent.id}) + # First time the query returns no other child + if is_auto_version_query: + data = [] + elif insert_race_condition_2.count < 1: + data = [parent.configuration] + else: + data = [parent.configuration, child.configuration] + + insert_race_condition_2.count += int(is_auto_version_query) + + return data + + insert_race_condition_2.count = 0 + + monkeypatch.setattr(get_storage().__class__, 'fetch_experiments', + insert_race_condition_2) + + with pytest.raises(RaceCondition) as exc_info: + experiment_builder.build(name=name, version=1, space={'x': 'loguniform(1,10)'}) + assert 'There was a race condition during branching.' in str(exc_info.value) + + +class TestInitExperimentView(object): + """Create new ExperimentView instance.""" + + def test_empty_experiment_view(self): + """Hit user name, but exp_name does not hit the db.""" + with OrionState(experiments=[], trials=[]): + with pytest.raises(ValueError) as exc_info: + experiment_builder.build_view('supernaekei') + assert ("No experiment with given name 'supernaekei' and version '*'" + in str(exc_info.value)) + + def test_existing_experiment_view(self, new_config): + """Hit exp_name + user's name in the db, fetch most recent entry.""" + with OrionState(experiments=[new_config], trials=[]): + exp = experiment_builder.build_view(name='supernaekei') + + assert exp._id == new_config['_id'] + assert exp.name == new_config['name'] + assert exp.configuration['refers'] == new_config['refers'] + assert exp.metadata == new_config['metadata'] + assert exp.pool_size == new_config['pool_size'] + assert exp.max_trials == new_config['max_trials'] + assert exp.version == new_config['version'] + assert isinstance(exp.refers['adapter'], BaseAdapter) + assert exp.algorithms.configuration == new_config['algorithms'] + + with pytest.raises(AttributeError): + exp.this_is_not_in_config = 5 + + # Test that experiment.update_completed_trial indeed exists + exp._experiment.update_completed_trial + with pytest.raises(AttributeError): + exp.update_completed_trial + + with pytest.raises(AttributeError): + exp.register_trial + + with pytest.raises(AttributeError): + exp.reserve_trial diff --git a/tests/unittests/core/io/test_orion_cmdline_parser.py b/tests/unittests/core/io/test_orion_cmdline_parser.py index 529ab1359..58564c516 100644 --- a/tests/unittests/core/io/test_orion_cmdline_parser.py +++ b/tests/unittests/core/io/test_orion_cmdline_parser.py @@ -10,13 +10,13 @@ @pytest.fixture def parser(): """Return an instance of `OrionCmdlineParser`.""" - return OrionCmdlineParser() + return OrionCmdlineParser(allow_non_existing_user_script=True) @pytest.fixture def parser_diff_prefix(): """Return an instance of `OrionCmdlineParser` with a different config prefix.""" - return OrionCmdlineParser(config_prefix='config2') + return OrionCmdlineParser(config_prefix='config2', allow_non_existing_user_script=True) @pytest.fixture @@ -91,11 +91,11 @@ def test_parse_from_unknown_config(parser, some_sample_config): def test_parse_equivalency(yaml_config, json_config): """Templates found from json and yaml are the same.""" - parser_yaml = OrionCmdlineParser() + parser_yaml = OrionCmdlineParser(allow_non_existing_user_script=True) parser_yaml.parse(yaml_config) dict_from_yaml = parser_yaml.config_file_data - parser_json = OrionCmdlineParser() + parser_json = OrionCmdlineParser(allow_non_existing_user_script=True) parser_json.parse(json_config) dict_from_json = parser_json.config_file_data assert dict_from_json == dict_from_yaml @@ -230,7 +230,6 @@ def test_format_with_properties(parser, cmd_with_properties, hacked_exp): {'name': '/prior', 'type': 'categorical', 'value': 'sgd'}]) cmd_line = parser.format(None, trial=trial, experiment=hacked_exp) - print(cmd_line) assert trial.hash_name in cmd_line assert 'supernaedo2-dendi' in cmd_line @@ -250,12 +249,11 @@ def test_configurable_config_arg(parser_diff_prefix, yaml_sample_path): assert '/something-same' in config -def test_get_state_dict_before_parse(commandline): +def test_get_state_dict_before_parse(parser, commandline): """Test getting state dict.""" - parser = OrionCmdlineParser() - assert parser.get_state_dict() == { 'parser': { + 'keys': [], 'arguments': [], 'template': []}, 'cmd_priors': list(map(list, parser.cmd_priors.items())), @@ -266,10 +264,8 @@ def test_get_state_dict_before_parse(commandline): 'converter': None} -def test_get_state_dict_after_parse_no_config_file(commandline): +def test_get_state_dict_after_parse_no_config_file(parser, commandline): """Test getting state dict.""" - parser = OrionCmdlineParser() - parser.parse(commandline) assert parser.get_state_dict() == { @@ -282,10 +278,8 @@ def test_get_state_dict_after_parse_no_config_file(commandline): 'converter': None} -def test_get_state_dict_after_parse_with_config_file(yaml_config, commandline): +def test_get_state_dict_after_parse_with_config_file(parser, yaml_config, commandline): """Test getting state dict.""" - parser = OrionCmdlineParser() - cmd_args = yaml_config cmd_args.extend(commandline) @@ -311,7 +305,7 @@ def test_set_state_dict(parser, commandline, json_config, tmpdir, json_converter state = parser.get_state_dict() parser = None - blank_parser = OrionCmdlineParser() + blank_parser = OrionCmdlineParser(allow_non_existing_user_script=True) blank_parser.set_state_dict(state) diff --git a/tests/unittests/core/io/test_resolve_config.py b/tests/unittests/core/io/test_resolve_config.py index 5e2d6e96c..fbf17d8a3 100644 --- a/tests/unittests/core/io/test_resolve_config.py +++ b/tests/unittests/core/io/test_resolve_config.py @@ -76,30 +76,34 @@ def test_fetch_env_vars(): @pytest.mark.usefixtures("version_XYZ") def test_fetch_metadata_orion_version(): """Verify orion version""" - metadata = resolve_config.fetch_metadata({}) + metadata = resolve_config.fetch_metadata() assert metadata['orion_version'] == 'XYZ' def test_fetch_metadata_executable_users_script(script_path): """Verify executable user script with absolute path""" - cmdargs = {'user_args': [script_path]} - metadata = resolve_config.fetch_metadata(cmdargs) + metadata = resolve_config.fetch_metadata(user_args=[script_path]) assert metadata['user_script'] == os.path.abspath(script_path) def test_fetch_metadata_non_executable_users_script(): """Verify executable user script keeps given path""" - cmdargs = {'user_args': ['tests/functional/demo/orion_config.yaml']} - metadata = resolve_config.fetch_metadata(cmdargs) - assert metadata['user_script'] == 'tests/functional/demo/orion_config.yaml' + script_path = 'tests/functional/demo/orion_config.yaml' + metadata = resolve_config.fetch_metadata(user_args=[script_path]) + assert metadata['user_script'] == script_path + + +def test_fetch_metadata_python_users_script(script_path): + """Verify user script is correctly infered if called with python""" + metadata = resolve_config.fetch_metadata(user_args=['python', script_path, 'some', 'args']) + assert metadata['user_script'] == script_path def test_fetch_metadata_not_existed_path(): """Verfiy the raise of error when user_script path does not exist""" path = 'dummy/path' - cmdargs = {'user_args': [path]} with pytest.raises(OSError) as exc_info: - resolve_config.fetch_metadata(cmdargs) + resolve_config.fetch_metadata(user_args=[path]) assert "The path specified for the script does not exist" in str(exc_info.value) @@ -107,25 +111,120 @@ def test_fetch_metadata_not_existed_path(): def test_fetch_metadata_user_args(script_path): """Verify user args""" user_args = [os.path.abspath(script_path)] + list(map(str, range(10))) - cmdargs = {'user_args': user_args} - metadata = resolve_config.fetch_metadata(cmdargs) + metadata = resolve_config.fetch_metadata(user_args=user_args) assert metadata['user_script'] == user_args[0] - assert metadata['user_args'] == user_args[1:] + assert metadata['user_args'] == user_args @pytest.mark.usefixtures("with_user_tsirif") def test_fetch_metadata_user_tsirif(): """Verify user name""" - metadata = resolve_config.fetch_metadata({}) + metadata = resolve_config.fetch_metadata() assert metadata['user'] == "tsirif" def test_fetch_metadata(): """Verify no additional data is stored in metadata""" - metadata = resolve_config.fetch_metadata({}) + metadata = resolve_config.fetch_metadata() len(metadata) == 4 +def test_fetch_config_from_cmdargs(): + """Verify fetch_config returns empty dict on no config file path""" + cmdargs = { + 'name': 'test', + 'user': 'me', + 'version': 1, + 'config': None, + 'exp_max_trials': 'exp_max_trials', + 'worker_trials': 'worker_trials', + 'exp_max_broken': 'exp_max_broken', + 'working_dir': 'working_dir', + 'pool_size': 'pool_size', + 'max_trials': 'max_trials', + 'heartbeat': 'heartbeat', + 'worker_max_trials': 'worker_max_trials', + 'worker_max_broken': 'worker_max_broken', + 'max_idle_time': 'max_idle_time', + 'interrupt_signal_code': 'interrupt_signal_code', + 'user_script_config': 'user_script_config', + 'manual_resolution': 'manual_resolution', + 'non_monitored_arguments': 'non_monitored_arguments', + 'ignore_code_changes': 'ignore_code_changes', + 'auto_resolution': 'auto_resolution', + 'branch_from': 'branch_from', + 'algorithm_change': 'algorithm_change', + 'code_change_type': 'code_change_type', + 'cli_change_type': 'cli_change_type', + 'branch_to': 'branch_to', + 'config_change_type': 'config_change_type'} + + config = resolve_config.fetch_config_from_cmdargs(cmdargs) + + assert config.pop('config', None) is None + + exp_config = config.pop('experiment') + assert exp_config.pop('name') == 'test' + assert exp_config.pop('version') == 1 + assert exp_config.pop('user') == 'me' + assert exp_config.pop('max_trials') == 'exp_max_trials' + assert exp_config.pop('max_broken') == 'exp_max_broken' + assert exp_config.pop('working_dir') == 'working_dir' + assert exp_config.pop('pool_size') == 'pool_size' + + assert exp_config == {} + + worker_config = config.pop('worker') + assert worker_config.pop('heartbeat') == 'heartbeat' + assert worker_config.pop('max_trials') == 'worker_max_trials' + assert worker_config.pop('max_broken') == 'worker_max_broken' + assert worker_config.pop('max_idle_time') == 'max_idle_time' + assert worker_config.pop('interrupt_signal_code') == 'interrupt_signal_code' + assert worker_config.pop('user_script_config') == 'user_script_config' + + assert worker_config == {} + + evc_config = config.pop('evc') + assert evc_config.pop('manual_resolution') == 'manual_resolution' + assert evc_config.pop('non_monitored_arguments') == 'non_monitored_arguments' + assert evc_config.pop('ignore_code_changes') == 'ignore_code_changes' + assert evc_config.pop('auto_resolution') == 'auto_resolution' + assert evc_config.pop('branch_from') == 'branch_from' + assert evc_config.pop('algorithm_change') == 'algorithm_change' + assert evc_config.pop('code_change_type') == 'code_change_type' + assert evc_config.pop('cli_change_type') == 'cli_change_type' + assert evc_config.pop('branch_to') == 'branch_to' + assert evc_config.pop('config_change_type') == 'config_change_type' + + assert evc_config == {} + + assert config == {} + + +@pytest.mark.parametrize( + "argument", + ['config', 'user', 'user_args', 'name', 'version', 'branch_from', 'branch_to']) +def test_fetch_config_from_cmdargs_no_empty(argument): + """Verify fetch_config returns only defined arguments.""" + config = resolve_config.fetch_config_from_cmdargs({}) + assert config == {} + + config = resolve_config.fetch_config_from_cmdargs({argument: None}) + assert config == {} + + config = resolve_config.fetch_config_from_cmdargs({argument: False}) + assert config == {} + + config = resolve_config.fetch_config_from_cmdargs({argument: 1}) + + if argument in ['name', 'user', 'version']: + assert config == {'experiment': {argument: 1}} + elif argument in ['branch_from', 'branch_to']: + assert config == {'evc': {argument: 1}} + else: + assert config == {argument: 1} + + def test_fetch_config_no_hit(): """Verify fetch_config returns empty dict on no config file path""" config = resolve_config.fetch_config({"config": ""}) @@ -136,14 +235,139 @@ def test_fetch_config(config_file): """Verify fetch_config returns valid dictionnary""" config = resolve_config.fetch_config({"config": config_file}) - assert config['algorithms'] == 'random' - assert config['database']['host'] == 'mongodb://user:pass@localhost' - assert config['database']['name'] == 'orion_test' - assert config['database']['type'] == 'mongodb' + assert config.pop('storage') == { + 'database': { + 'host': 'mongodb://user:pass@localhost', + 'name': 'orion_test', + 'type': 'mongodb'}} + + assert config.pop('experiment') == { + 'max_trials': 100, + 'name': 'voila_voici', + 'pool_size': 1, + 'algorithms': 'random', + 'strategy': 'NoParallelStrategy'} + + assert config == {} + + +def test_fetch_config_global_local_coherence(monkeypatch, config_file): + """Verify fetch_config parses local config according to global config structure.""" + def mocked_config(file_object): + return orion.core.config.to_dict() + monkeypatch.setattr('yaml.safe_load', mocked_config) + + config = resolve_config.fetch_config({"config": config_file}) + + # Test storage subconfig + storage_config = config.pop('storage') + database_config = storage_config.pop('database') + assert storage_config.pop('type') == orion.core.config.storage.type + assert storage_config == {} + + assert database_config.pop('host') == orion.core.config.storage.database.host + assert database_config.pop('name') == orion.core.config.storage.database.name + assert database_config.pop('port') == orion.core.config.storage.database.port + assert database_config.pop('type') == orion.core.config.storage.database.type + + assert database_config == {} + + # Test experiment subconfig + exp_config = config.pop('experiment') + assert exp_config.pop('max_trials') == orion.core.config.experiment.max_trials + assert exp_config.pop('max_broken') == orion.core.config.experiment.max_broken + assert exp_config.pop('working_dir') == orion.core.config.experiment.working_dir + assert exp_config.pop('pool_size') == orion.core.config.experiment.pool_size + assert exp_config.pop('algorithms') == orion.core.config.experiment.algorithms + assert exp_config.pop('strategy') == orion.core.config.experiment.strategy + + assert exp_config == {} + + # Test worker subconfig + worker_config = config.pop('worker') + assert worker_config.pop('heartbeat') == orion.core.config.worker.heartbeat + assert worker_config.pop('max_trials') == orion.core.config.worker.max_trials + assert worker_config.pop('max_broken') == orion.core.config.worker.max_broken + assert worker_config.pop('max_idle_time') == orion.core.config.worker.max_idle_time + assert (worker_config.pop('interrupt_signal_code') == + orion.core.config.worker.interrupt_signal_code) + assert (worker_config.pop('user_script_config') == + orion.core.config.worker.user_script_config) + + assert worker_config == {} + + # Test evc subconfig + evc_config = config.pop('evc') + assert evc_config.pop('auto_resolution') == orion.core.config.evc.auto_resolution + assert evc_config.pop('manual_resolution') == orion.core.config.evc.manual_resolution + assert (evc_config.pop('non_monitored_arguments') == + orion.core.config.evc.non_monitored_arguments) + assert evc_config.pop('ignore_code_changes') == orion.core.config.evc.ignore_code_changes + assert evc_config.pop('algorithm_change') == orion.core.config.evc.algorithm_change + assert evc_config.pop('code_change_type') == orion.core.config.evc.code_change_type + assert evc_config.pop('cli_change_type') == orion.core.config.evc.cli_change_type + assert evc_config.pop('config_change_type') == orion.core.config.evc.config_change_type + + assert evc_config == {} + + # Confirm that all fields were tested. + assert config == {} + + +def test_fetch_config_dash(monkeypatch, config_file): + """Verify fetch_config supports dash.""" + def mocked_config(file_object): + return {'experiment': {'max-broken': 10, 'algorithms': {'dont-change': 'me'}}} + monkeypatch.setattr('yaml.safe_load', mocked_config) + + config = resolve_config.fetch_config({"config": config_file}) + + assert config == {'experiment': {'max_broken': 10, 'algorithms': {'dont-change': 'me'}}} + + +def test_fetch_config_underscore(monkeypatch, config_file): + """Verify fetch_config supports underscore as well.""" + def mocked_config(file_object): + return {'experiment': {'max_broken': 10, 'algorithms': {'dont-change': 'me'}}} + monkeypatch.setattr('yaml.safe_load', mocked_config) + + config = resolve_config.fetch_config({"config": config_file}) + + assert config == {'experiment': {'max_broken': 10, 'algorithms': {'dont-change': 'me'}}} + + +def test_fetch_config_deprecated_max_trials(monkeypatch, config_file): + """Verify fetch_config will overwrite deprecated value if also properly defined.""" + def mocked_config(file_object): + return {'experiment': {'max_trials': 10}, 'max_trials': 20} + monkeypatch.setattr('yaml.safe_load', mocked_config) + + config = resolve_config.fetch_config({"config": config_file}) + + assert config == {'experiment': {'max_trials': 10}} + + +def test_fetch_config_deprecate_tricky_names(monkeypatch, config_file): + """Verify fetch_config assigns values properly for the similar names.""" + def mocked_config(file_object): + return { + 'experiment': { + 'worker_trials': 'should_be_ignored'}, + 'max_trials': 'exp_max_trials', + 'max_broken': 'exp_max_broken', + 'worker_trials': 'worker_max_trials', + 'name': 'exp_name' + } + monkeypatch.setattr('yaml.safe_load', mocked_config) + + config = resolve_config.fetch_config({"config": config_file}) - assert config['max_trials'] == 100 - assert config['name'] == 'voila_voici' - assert config['pool_size'] == 1 + assert config == { + 'experiment': { + 'name': 'exp_name', + 'max_trials': 'exp_max_trials', + 'max_broken': 'exp_max_broken'}, + 'worker': {'max_trials': 'worker_max_trials'}} def test_merge_configs_update_two(): diff --git a/tests/unittests/core/io/test_space_builder.py b/tests/unittests/core/io/test_space_builder.py index 4d5bf4071..0bac4d59f 100644 --- a/tests/unittests/core/io/test_space_builder.py +++ b/tests/unittests/core/io/test_space_builder.py @@ -203,10 +203,21 @@ def test_build_fails_because_troll(self, dimbuilder): class TestSpaceBuilder(object): """Check whether space definition from various input format is successful.""" - def test_build_with_nothing(self, spacebuilder): - """Return an empty space if nothing is provided.""" - space = spacebuilder.build_from([]) - assert not space - - space = spacebuilder.build_from(["--seed=555", "--naedw"]) - assert not space + def test_configuration_rebuild(self, spacebuilder): + """Test that configuration can be used to recreate a space.""" + prior = {'x': 'uniform(0, 10, discrete=True)', + 'y': 'loguniform(1e-08, 1)', + 'z': 'choices([\'voici\', \'voila\', 2])'} + space = spacebuilder.build(prior) + assert space.configuration == prior + + def test_subdict_dimensions(self, spacebuilder): + """Test space can have hierarchical structure.""" + prior = {'a': {'x': 'uniform(0, 10, discrete=True)'}, + 'b': {'y': 'loguniform(1e-08, 1)', + 'z': 'choices([\'voici\', \'voila\', 2])'}} + space = spacebuilder.build(prior) + assert len(space) == 3 + assert 'a.x' in space + assert 'b.y' in space + assert 'b.z' in space diff --git a/tests/unittests/core/sample_config.json b/tests/unittests/core/sample_config.json index 1abb3f977..276d884b2 100644 --- a/tests/unittests/core/sample_config.json +++ b/tests/unittests/core/sample_config.json @@ -1,7 +1,7 @@ { "yo": 5, "training": { - "lr0": "orion~loguniform(0.0001, 0.3)", + "lr0": "orion~loguniform(0.0001, 0.3, precision=None)", "mbs": "orion~uniform(32, 256, discrete=True)" }, "layers": [ diff --git a/tests/unittests/core/sample_config.yml b/tests/unittests/core/sample_config.yml index d4f1149ca..4626e81f7 100644 --- a/tests/unittests/core/sample_config.yml +++ b/tests/unittests/core/sample_config.yml @@ -1,6 +1,6 @@ yo: 5 training: - lr0: orion~loguniform(0.0001, 0.3) + lr0: orion~loguniform(0.0001, 0.3, precision=None) mbs: orion~uniform(32, 256, discrete=True) # some comments diff --git a/tests/unittests/core/sample_config_diff.yml b/tests/unittests/core/sample_config_diff.yml index 6f01059e7..c60e8e832 100644 --- a/tests/unittests/core/sample_config_diff.yml +++ b/tests/unittests/core/sample_config_diff.yml @@ -1,6 +1,6 @@ yo: 5 training: - lr0: orion~loguniform(0.0001, 0.3) + lr0: orion~loguniform(0.0001, 0.3, precision=None) mbs: orion~uniform(32, 256, discrete=True) # some comments diff --git a/tests/unittests/core/test_branch_config.py b/tests/unittests/core/test_branch_config.py index b995bbec5..4de1fa74d 100644 --- a/tests/unittests/core/test_branch_config.py +++ b/tests/unittests/core/test_branch_config.py @@ -8,6 +8,7 @@ import pytest import yaml +import orion.core from orion.core import evc from orion.core.evc.conflicts import ( AlgorithmConflict, ChangedDimensionConflict, CodeConflict, CommandLineConflict, @@ -41,6 +42,7 @@ def user_config(): @pytest.fixture def parent_config(user_config): """Create a configuration that will not hit the database.""" + user_script = 'abs_path/black_box.py' config = dict( _id='test', name='test', @@ -52,10 +54,10 @@ def parent_config(user_config): "active_branch": None, "diff_sha": "diff", }, - 'user_script': 'abs_path/black_box.py', - 'user_args': - ['--nameless=option', '-x~uniform(0,1)', '-y~normal(0,1)', '-z~uniform(0,10)', - '--manual-resolution'], + 'user_script': user_script, + 'user_args': [ + user_script, '--nameless=option', '-x~uniform(0,1)', '-y~normal(0,1)', + '-z~uniform(0,10)', '--manual-resolution'], 'user': 'some_user_name'}, refers={}) @@ -66,7 +68,7 @@ def parent_config(user_config): config['metadata']['user_args'].append('--config=%s' % config_file_path) - backward.populate_priors(config['metadata']) + backward.populate_space(config) yield config os.remove(config_file_path) @@ -85,9 +87,9 @@ def child_config(parent_config, create_db_instance): @pytest.fixture def missing_config(child_config): """Create a child config with a missing dimension""" - del(child_config['metadata']['user_args'][1]) # del -x - del(child_config['metadata']['user_args'][1]) # del -y - backward.populate_priors(child_config['metadata']) + del(child_config['metadata']['user_args'][2]) # del -x + del(child_config['metadata']['user_args'][2]) # del -y + backward.populate_space(child_config) return child_config @@ -95,17 +97,17 @@ def missing_config(child_config): def new_config(child_config): """Create a child config with a new dimension""" child_config['metadata']['user_args'].append('-w_d~normal(0,1)') - backward.populate_priors(child_config['metadata']) + backward.populate_space(child_config) return child_config @pytest.fixture def changed_config(child_config): """Create a child config with a changed dimension""" - second_element = child_config['metadata']['user_args'][2] + second_element = child_config['metadata']['user_args'][3] second_element = second_element.replace('normal', 'uniform') - child_config['metadata']['user_args'][2] = second_element - backward.populate_priors(child_config['metadata']) + child_config['metadata']['user_args'][3] = second_element + backward.populate_space(child_config) return child_config @@ -130,7 +132,7 @@ def same_userconfig_config(user_config, child_config): with open(config_file_path, 'w') as f: yaml.dump(user_config, f) child_config['metadata']['user_args'][-1] = '--config=%s' % config_file_path - backward.populate_priors(child_config['metadata']) + backward.populate_space(child_config) yield child_config os.remove(config_file_path) @@ -139,12 +141,12 @@ def same_userconfig_config(user_config, child_config): def changed_userconfig_config(user_config, child_config): """Create a child config with a changed dimension""" config_file_path = './changed_config.yaml' - user_config['b'] = 'orion~uniform(-20, 0)' + user_config['b'] = 'orion~uniform(-20, 0, precision=None)' user_config['some_other'] = 'hello' with open(config_file_path, 'w') as f: yaml.dump(user_config, f) child_config['metadata']['user_args'][-1] = '--config=%s' % config_file_path - backward.populate_priors(child_config['metadata']) + backward.populate_space(child_config) yield child_config os.remove(config_file_path) @@ -153,7 +155,7 @@ def changed_userconfig_config(user_config, child_config): def changed_cli_config(child_config): """Create a child config with a changed dimension""" child_config['metadata']['user_args'] += ['-u=0', '--another=test', 'positional'] - backward.populate_priors(child_config['metadata']) + backward.populate_space(child_config) return child_config @@ -164,24 +166,25 @@ def list_arg_with_equals_cli_config(child_config): """ child_config['metadata']['user_args'] += ['--args=1', '--args=2', '--args=3'] - backward.populate_priors(child_config['metadata']) + backward.populate_space(child_config) return child_config @pytest.fixture def cl_config(create_db_instance): """Create a child config with markers for commandline solving""" + user_script = 'abs_path/black_box.py' config = dict( name='test', branch='test2', algorithms='random', metadata={'hash_commit': 'old', - 'user_script': 'abs_path/black_box.py', - 'user_args': - ['--nameless=option', '-x~>w_d', '-w_d~+normal(0,1)', '-y~+uniform(0,1)', '-z~-', - '--omega~+normal(0,1)'], + 'user_script': user_script, + 'user_args': [ + user_script, '--nameless=option', '-x~>w_d', '-w_d~+normal(0,1)', + '-y~+uniform(0,1)', '-z~-', '--omega~+normal(0,1)'], 'user': 'some_user_name'}) - backward.populate_priors(config['metadata']) + backward.populate_space(config) return config @@ -280,6 +283,13 @@ def test_code_conflict(self, parent_config, changed_code_config): assert conflict.new_config['metadata']['VCS']['HEAD_sha'] == 'new_test' assert isinstance(conflict, CodeConflict) + def test_ignore_code_conflict(self, parent_config, changed_code_config): + """Test if ignored code commit hash change is detected as a conflict""" + conflicts = detect_conflicts(parent_config, changed_code_config, + {'ignore_code_changes': True}) + + assert len(conflicts.get()) == 1 + def test_config_new_name_no_conflict(self, parent_config, same_userconfig_config): """Test if same configuration file with a different name is not detected as a conflict""" conflicts = detect_conflicts(parent_config, same_userconfig_config) @@ -301,7 +311,6 @@ def test_config_non_dim_conflict(self, parent_config, changed_userconfig_config) assert not conflicts.get([ExperimentNameConflict])[0].is_resolved assert not conflicts.get([ScriptConfigConflict])[0].is_resolved - @pytest.mark.skip(reason='Args defined with \'=\' are not supported currently.') def test_cli_conflict(self, parent_config, changed_cli_config): """Test if changed command line call is detected as a conflict""" conflicts = detect_conflicts(parent_config, changed_cli_config) @@ -311,16 +320,26 @@ def test_cli_conflict(self, parent_config, changed_cli_config): assert not conflicts.get([ExperimentNameConflict])[0].is_resolved assert not conflicts.get([CommandLineConflict])[0].is_resolved + def test_cli_ignored_conflict(self, parent_config, changed_cli_config): + """Test if ignored changed command line call is detected as a conflict""" + changed_cli_config['metadata']['user_args'].pop() + conflicts = detect_conflicts(parent_config, changed_cli_config, + {'non_monitored_arguments': ['u', 'another']}) + + assert len(conflicts.get()) == 1 + + assert not conflicts.get([ExperimentNameConflict])[0].is_resolved + class TestResolutions(object): """Test resolution of conflicts""" def test_add_single_hit(self, parent_config, new_config): """Test if adding a dimension only touches the correct status""" - del new_config['metadata']['user_args'][1] - backward.populate_priors(new_config['metadata']) + del new_config['metadata']['user_args'][2] + backward.populate_space(new_config) conflicts = detect_conflicts(parent_config, new_config) - branch_builder = ExperimentBranchBuilder(conflicts, {'manual_resolution': True}) + branch_builder = ExperimentBranchBuilder(conflicts, manual_resolution=True) branch_builder.add_dimension('w_d') assert len(conflicts.get()) == 3 @@ -331,7 +350,7 @@ def test_add_single_hit(self, parent_config, new_config): def test_add_new(self, parent_config, new_config): """Test if adding a new dimension solves the conflict""" conflicts = detect_conflicts(parent_config, new_config) - branch_builder = ExperimentBranchBuilder(conflicts, {'manual_resolution': True}) + branch_builder = ExperimentBranchBuilder(conflicts, manual_resolution=True) branch_builder.add_dimension('w_d') assert len(conflicts.get()) == 2 @@ -345,7 +364,7 @@ def test_add_new(self, parent_config, new_config): def test_add_changed(self, parent_config, changed_config): """Test if adding a changed dimension solves the conflict""" conflicts = detect_conflicts(parent_config, changed_config) - branch_builder = ExperimentBranchBuilder(conflicts, {'manual_resolution': True}) + branch_builder = ExperimentBranchBuilder(conflicts, manual_resolution=True) branch_builder.add_dimension('y') assert len(conflicts.get()) == 2 @@ -359,7 +378,7 @@ def test_add_changed(self, parent_config, changed_config): def test_remove_missing(self, parent_config, missing_config): """Test if removing a missing dimension solves the conflict""" conflicts = detect_conflicts(parent_config, missing_config) - branch_builder = ExperimentBranchBuilder(conflicts, {'manual_resolution': True}) + branch_builder = ExperimentBranchBuilder(conflicts, manual_resolution=True) branch_builder.remove_dimension('x') assert len(conflicts.get()) == 3 @@ -373,9 +392,9 @@ def test_remove_missing(self, parent_config, missing_config): def test_rename_missing(self, parent_config, missing_config): """Test if renaming a dimension to another solves both conflicts""" missing_config['metadata']['user_args'].append('-w_d~uniform(0,1)') - backward.populate_priors(missing_config['metadata']) + backward.populate_space(missing_config) conflicts = detect_conflicts(parent_config, missing_config) - branch_builder = ExperimentBranchBuilder(conflicts, {'manual_resolution': True}) + branch_builder = ExperimentBranchBuilder(conflicts, manual_resolution=True) branch_builder.rename_dimension('x', 'w_d') assert len(conflicts.get()) == 4 @@ -398,9 +417,9 @@ def test_rename_missing_changed(self, parent_config, missing_config): creates a new one which is not solved """ missing_config['metadata']['user_args'].append('-w_d~normal(0,1)') - backward.populate_priors(missing_config['metadata']) + backward.populate_space(missing_config) conflicts = detect_conflicts(parent_config, missing_config) - branch_builder = ExperimentBranchBuilder(conflicts, {'manual_resolution': True}) + branch_builder = ExperimentBranchBuilder(conflicts, manual_resolution=True) assert len(conflicts.get()) == 4 @@ -425,7 +444,7 @@ def test_rename_missing_changed(self, parent_config, missing_config): def test_reset_dimension(self, parent_config, new_config): """Test if resetting a dimension unsolves the conflict""" conflicts = detect_conflicts(parent_config, new_config) - branch_builder = ExperimentBranchBuilder(conflicts, {'manual_resolution': True}) + branch_builder = ExperimentBranchBuilder(conflicts, manual_resolution=True) branch_builder.add_dimension('w_d') assert len(conflicts.get_resolved()) == 2 @@ -447,12 +466,12 @@ def test_reset_dimension(self, parent_config, new_config): def test_name_experiment(self, bad_exp_parent_config, bad_exp_child_config, create_db_instance): """Test if having the same experiment name does not create a conflict.""" - backward.populate_priors(bad_exp_parent_config['metadata']) - backward.populate_priors(bad_exp_child_config['metadata']) + backward.populate_space(bad_exp_parent_config) + backward.populate_space(bad_exp_child_config) create_db_instance.write('experiments', bad_exp_parent_config) create_db_instance.write('experiments', bad_exp_child_config) conflicts = detect_conflicts(bad_exp_parent_config, bad_exp_parent_config) - branch_builder = ExperimentBranchBuilder(conflicts, {'manual_resolution': True}) + branch_builder = ExperimentBranchBuilder(conflicts, manual_resolution=True) assert len(conflicts.get()) == 1 assert len(conflicts.get_resolved()) == 0 @@ -482,7 +501,7 @@ def _versions(self, *args, **kwargs): "_check_for_greater_versions", _versions) conflicts = detect_conflicts(parent_config, child_config) - branch_builder = ExperimentBranchBuilder(conflicts, {'manual_resolution': True}) + branch_builder = ExperimentBranchBuilder(conflicts, manual_resolution=True) assert len(conflicts.get()) == 1 assert len(conflicts.get_resolved()) == 0 @@ -498,7 +517,7 @@ def _versions(self, *args, **kwargs): def test_algo_change(self, parent_config, changed_algo_config): """Test if setting the algorithm conflict solves it""" conflicts = detect_conflicts(parent_config, changed_algo_config) - branch_builder = ExperimentBranchBuilder(conflicts, {'manual_resolution': True}) + branch_builder = ExperimentBranchBuilder(conflicts, manual_resolution=True) assert len(conflicts.get()) == 2 assert len(conflicts.get_resolved()) == 1 @@ -516,7 +535,7 @@ def test_algo_change(self, parent_config, changed_algo_config): def test_code_change(self, parent_config, changed_code_config): """Test if giving a proper change-type solves the code conflict""" conflicts = detect_conflicts(parent_config, changed_code_config) - branch_builder = ExperimentBranchBuilder(conflicts, {'manual_resolution': True}) + branch_builder = ExperimentBranchBuilder(conflicts, manual_resolution=True) assert len(conflicts.get()) == 2 assert len(conflicts.get_resolved()) == 1 @@ -533,7 +552,7 @@ def test_code_change(self, parent_config, changed_code_config): def test_bad_code_change(self, capsys, parent_config, changed_code_config): """Test if giving an invalid change-type prints error message and do nothing""" conflicts = detect_conflicts(parent_config, changed_code_config) - branch_builder = ExperimentBranchBuilder(conflicts, {'manual_resolution': True}) + branch_builder = ExperimentBranchBuilder(conflicts, manual_resolution=True) capsys.readouterr() branch_builder.set_code_change_type('bad-type') out, err = capsys.readouterr() @@ -545,7 +564,7 @@ def test_bad_code_change(self, capsys, parent_config, changed_code_config): def test_config_change(self, parent_config, changed_userconfig_config): """Test if giving a proper change-type solves the user script config conflict""" conflicts = detect_conflicts(parent_config, changed_userconfig_config) - branch_builder = ExperimentBranchBuilder(conflicts, {'manual_resolution': True}) + branch_builder = ExperimentBranchBuilder(conflicts, manual_resolution=True) assert len(conflicts.get()) == 4 assert len(conflicts.get_resolved()) == 1 @@ -562,7 +581,7 @@ def test_config_change(self, parent_config, changed_userconfig_config): def test_bad_config_change(self, capsys, parent_config, changed_userconfig_config): """Test if giving an invalid change-type prints error message and do nothing""" conflicts = detect_conflicts(parent_config, changed_userconfig_config) - branch_builder = ExperimentBranchBuilder(conflicts, {'manual_resolution': True}) + branch_builder = ExperimentBranchBuilder(conflicts, manual_resolution=True) capsys.readouterr() branch_builder.set_script_config_change_type('bad-type') out, err = capsys.readouterr() @@ -571,11 +590,10 @@ def test_bad_config_change(self, capsys, parent_config, changed_userconfig_confi assert len(conflicts.get()) == 4 assert len(conflicts.get_resolved()) == 1 - @pytest.mark.skip(reason='Args defined with \'=\' are not supported currently.') def test_cli_change(self, parent_config, changed_cli_config): """Test if giving a proper change-type solves the command line conflict""" conflicts = detect_conflicts(parent_config, changed_cli_config) - branch_builder = ExperimentBranchBuilder(conflicts, {'manual_resolution': True}) + branch_builder = ExperimentBranchBuilder(conflicts, manual_resolution=True) assert len(conflicts.get()) == 2 assert len(conflicts.get_resolved()) == 1 @@ -585,15 +603,14 @@ def test_cli_change(self, parent_config, changed_cli_config): assert len(conflicts.get()) == 2 assert len(conflicts.get_resolved()) == 2 - conflict = conflicts.get_resolved()[1] + conflict = conflicts.get_resolved()[0] assert conflict.is_resolved assert isinstance(conflict, CommandLineConflict) - @pytest.mark.skip(reason='Args defined with \'=\' are not supported currently.') def test_bad_cli_change(self, capsys, parent_config, changed_cli_config): """Test if giving an invalid change-type prints error message and do nothing""" conflicts = detect_conflicts(parent_config, changed_cli_config) - branch_builder = ExperimentBranchBuilder(conflicts, {'manual_resolution': True}) + branch_builder = ExperimentBranchBuilder(conflicts, manual_resolution=True) capsys.readouterr() branch_builder.set_cli_change_type('bad-type') out, err = capsys.readouterr() @@ -604,7 +621,7 @@ def test_bad_cli_change(self, capsys, parent_config, changed_cli_config): def test_solve_all_automatically(self, conflicts): """Test if all conflicts all automatically resolve by the ExperimentBranchBuilder.""" - ExperimentBranchBuilder(conflicts, {}) + ExperimentBranchBuilder(conflicts) assert len(conflicts.get_resolved()) == 8 @@ -616,7 +633,7 @@ def test_add_new(self, parent_config, new_config): """Test if new dimension conflict is automatically resolved""" new_config['metadata']['user_args'][-1] = '-w_d~+normal(0,1)' conflicts = detect_conflicts(parent_config, new_config) - ExperimentBranchBuilder(conflicts, {'manual_resolution': True}) + ExperimentBranchBuilder(conflicts, manual_resolution=True) assert len(conflicts.get()) == 2 assert len(conflicts.get_resolved()) == 2 @@ -629,9 +646,9 @@ def test_add_new(self, parent_config, new_config): def test_add_new_default(self, parent_config, new_config): """Test if new dimension conflict is automatically resolved""" new_config['metadata']['user_args'][-1] = '-w_d~+normal(0,1,default_value=0)' - backward.populate_priors(new_config['metadata']) + backward.populate_space(new_config) conflicts = detect_conflicts(parent_config, new_config) - ExperimentBranchBuilder(conflicts, {'manual_resolution': True}) + ExperimentBranchBuilder(conflicts, manual_resolution=True) assert len(conflicts.get()) == 2 assert len(conflicts.get_resolved()) == 2 @@ -645,17 +662,17 @@ def test_add_new_default(self, parent_config, new_config): def test_add_bad_default(self, parent_config, new_config): """Test if new dimension conflict raises an error if marked with invalid default value""" new_config['metadata']['user_args'][-1] = '-w_d~+normal(0,1,default_value=\'a\')' - backward.populate_priors(new_config['metadata']) + backward.populate_space(new_config) with pytest.raises(TypeError) as exc: detect_conflicts(parent_config, new_config) assert "Parameter \'/w_d\': Incorrect arguments." in str(exc.value) def test_add_changed(self, parent_config, changed_config): """Test if changed dimension conflict is automatically resolved""" - changed_config['metadata']['user_args'][2] = ( - changed_config['metadata']['user_args'][2].replace("~", "~+")) + changed_config['metadata']['user_args'][3] = ( + changed_config['metadata']['user_args'][3].replace("~", "~+")) conflicts = detect_conflicts(parent_config, changed_config) - ExperimentBranchBuilder(conflicts, {'manual_resolution': True}) + ExperimentBranchBuilder(conflicts, manual_resolution=True) assert len(conflicts.get()) == 2 assert len(conflicts.get_resolved()) == 2 @@ -667,10 +684,10 @@ def test_add_changed(self, parent_config, changed_config): def test_remove_missing(self, parent_config, child_config): """Test if missing dimension conflict is automatically resolved""" - child_config['metadata']['user_args'][1] = '-x~-' - backward.populate_priors(child_config['metadata']) + child_config['metadata']['user_args'][2] = '-x~-' + backward.populate_space(child_config) conflicts = detect_conflicts(parent_config, child_config) - ExperimentBranchBuilder(conflicts, {'manual_resolution': True}) + ExperimentBranchBuilder(conflicts, manual_resolution=True) assert len(conflicts.get()) == 2 assert len(conflicts.get_resolved()) == 2 @@ -682,10 +699,10 @@ def test_remove_missing(self, parent_config, child_config): def test_remove_missing_default(self, parent_config, child_config): """Test if missing dimension conflict is automatically resolved""" - child_config['metadata']['user_args'][1] = '-x~-0.5' - backward.populate_priors(child_config['metadata']) + child_config['metadata']['user_args'][2] = '-x~-0.5' + backward.populate_space(child_config) conflicts = detect_conflicts(parent_config, child_config) - ExperimentBranchBuilder(conflicts, {'manual_resolution': True}) + ExperimentBranchBuilder(conflicts, manual_resolution=True) assert len(conflicts.get()) == 2 assert len(conflicts.get_resolved()) == 2 @@ -698,10 +715,10 @@ def test_remove_missing_default(self, parent_config, child_config): def test_remove_missing_bad_default(self, parent_config, child_config): """Test if missing dimension conflict raises an error if marked with invalid default""" - child_config['metadata']['user_args'][1] = '-x~--100' - backward.populate_priors(child_config['metadata']) + child_config['metadata']['user_args'][2] = '-x~--100' + backward.populate_space(child_config) conflicts = detect_conflicts(parent_config, child_config) - ExperimentBranchBuilder(conflicts, {'manual_resolution': True}) + ExperimentBranchBuilder(conflicts, manual_resolution=True) assert len(conflicts.get()) == 2 assert len(conflicts.get_resolved()) == 1 @@ -715,10 +732,10 @@ def test_rename_missing(self, parent_config, child_config): """Test if renaming is automatically applied with both conflicts resolved""" child_config['metadata']['user_args'].append('-w_a~uniform(0,1)') child_config['metadata']['user_args'].append('-w_b~normal(0,1)') - child_config['metadata']['user_args'][1] = '-x~>w_a' - backward.populate_priors(child_config['metadata']) + child_config['metadata']['user_args'][2] = '-x~>w_a' + backward.populate_space(child_config) conflicts = detect_conflicts(parent_config, child_config) - ExperimentBranchBuilder(conflicts, {'manual_resolution': True}) + ExperimentBranchBuilder(conflicts, manual_resolution=True) assert len(conflicts.get()) == 4 @@ -739,11 +756,11 @@ def test_rename_invalid(self, parent_config, child_config): """Test if renaming to invalid dimension raises an error""" child_config['metadata']['user_args'].append('-w_a~uniform(0,1)') child_config['metadata']['user_args'].append('-w_b~uniform(0,1)') - child_config['metadata']['user_args'][1] = '-x~>w_c' - backward.populate_priors(child_config['metadata']) + child_config['metadata']['user_args'][2] = '-x~>w_c' + backward.populate_space(child_config) conflicts = detect_conflicts(parent_config, child_config) with pytest.raises(ValueError) as exc: - ExperimentBranchBuilder(conflicts, {'manual_resolution': True}) + ExperimentBranchBuilder(conflicts, manual_resolution=True) assert "Dimension name 'w_c' not found in conflicts" in str(exc.value) def test_rename_missing_changed(self, parent_config, child_config): @@ -752,10 +769,10 @@ def test_rename_missing_changed(self, parent_config, child_config): """ child_config['metadata']['user_args'].append('-w_a~uniform(0,1)') child_config['metadata']['user_args'].append('-w_b~normal(0,1)') - child_config['metadata']['user_args'][1] = '-x~>w_b' - backward.populate_priors(child_config['metadata']) + child_config['metadata']['user_args'][2] = '-x~>w_b' + backward.populate_space(child_config) conflicts = detect_conflicts(parent_config, child_config) - ExperimentBranchBuilder(conflicts, {'manual_resolution': True}) + ExperimentBranchBuilder(conflicts, manual_resolution=True) assert len(conflicts.get()) == 5 @@ -779,10 +796,10 @@ def test_rename_missing_changed_marked(self, parent_config, child_config): """ child_config['metadata']['user_args'].append('-w_a~uniform(0,1)') child_config['metadata']['user_args'].append('-w_b~+normal(0,1)') - child_config['metadata']['user_args'][1] = '-x~>w_b' - backward.populate_priors(child_config['metadata']) + child_config['metadata']['user_args'][2] = '-x~>w_b' + backward.populate_space(child_config) conflicts = detect_conflicts(parent_config, child_config) - ExperimentBranchBuilder(conflicts, {'manual_resolution': True}) + ExperimentBranchBuilder(conflicts, manual_resolution=True) assert len(conflicts.get()) == 5 @@ -800,12 +817,35 @@ def test_rename_missing_changed_marked(self, parent_config, child_config): assert resolved_conflicts[1].resolution.conflict.dimension.name == '/x' assert resolved_conflicts[1].resolution.new_dimension_conflict.dimension.name == '/w_b' - def test_name_experiment(self, parent_config, child_config, create_db_instance): - """Test if experiment name conflict is automatically resolved""" - new_name = 'test2' + def test_name_experiment_version_update(self, parent_config, child_config, create_db_instance): + """Test if experiment name conflict is automatically resolved with version update""" + old_name = 'test' + new_version = 2 create_db_instance.write('experiments', parent_config) + child_config['version'] = 1 conflicts = detect_conflicts(parent_config, child_config) - ExperimentBranchBuilder(conflicts, {'branch': new_name}) + ExperimentBranchBuilder(conflicts) + + assert len(conflicts.get()) == 1 + assert len(conflicts.get_resolved()) == 1 + + conflict = conflicts.get()[0] + + assert conflict.resolution.new_name == old_name + assert conflict.resolution.new_version == 2 + assert conflict.new_config['name'] == old_name + assert conflict.new_config['version'] == new_version + assert conflict.is_resolved + + def test_name_experiment_name_change(self, parent_config, child_config, create_db_instance): + """Test if experiment name conflict is automatically resolved when new name provided""" + new_name = 'test2' + create_db_instance.write('experiments', parent_config) + create_db_instance.write('experiments', child_config) + child_config2 = copy.deepcopy(child_config) + child_config2['version'] = 1 + conflicts = detect_conflicts(parent_config, child_config2) + ExperimentBranchBuilder(conflicts, branch_to=new_name) assert len(conflicts.get()) == 1 assert len(conflicts.get_resolved()) == 1 @@ -813,7 +853,9 @@ def test_name_experiment(self, parent_config, child_config, create_db_instance): conflict = conflicts.get()[0] assert conflict.resolution.new_name == new_name + assert conflict.resolution.new_version == 1 assert conflict.new_config['name'] == new_name + assert conflict.new_config['version'] == 1 assert conflict.is_resolved def test_bad_name_experiment(self, parent_config, child_config, monkeypatch): @@ -825,7 +867,7 @@ def _is_unique(self, *args, **kwargs): _is_unique) conflicts = detect_conflicts(parent_config, child_config) - ExperimentBranchBuilder(conflicts, {'branch': 'test2'}) + ExperimentBranchBuilder(conflicts, branch_to='test2') assert len(conflicts.get()) == 1 assert len(conflicts.get_resolved()) == 0 @@ -835,7 +877,7 @@ def test_code_change(self, parent_config, changed_code_config): change_type = evc.adapters.CodeChange.types[0] changed_code_config['code_change_type'] = change_type conflicts = detect_conflicts(parent_config, changed_code_config) - ExperimentBranchBuilder(conflicts, {'manual_resolution': True}) + ExperimentBranchBuilder(conflicts, manual_resolution=True) assert len(conflicts.get()) == 2 assert len(conflicts.get_resolved()) == 2 @@ -850,7 +892,7 @@ def test_algo_change(self, parent_config, changed_algo_config): """Test if algorithm conflict is resolved automatically""" changed_algo_config['algorithm_change'] = True conflicts = detect_conflicts(parent_config, changed_algo_config) - ExperimentBranchBuilder(conflicts, {'manual_resolution': True}) + ExperimentBranchBuilder(conflicts, manual_resolution=True) assert len(conflicts.get()) == 2 assert len(conflicts.get_resolved()) == 2 @@ -865,7 +907,7 @@ def test_config_change(self, parent_config, changed_userconfig_config): change_type = evc.adapters.ScriptConfigChange.types[0] changed_userconfig_config['config_change_type'] = change_type conflicts = detect_conflicts(parent_config, changed_userconfig_config) - ExperimentBranchBuilder(conflicts, {'manual_resolution': True}) + ExperimentBranchBuilder(conflicts, manual_resolution=True) assert len(conflicts.get()) == 4 assert len(conflicts.get_resolved()) == 2 @@ -876,13 +918,12 @@ def test_config_change(self, parent_config, changed_userconfig_config): assert isinstance(conflict.resolution, conflict.ScriptConfigResolution) assert conflict.resolution.type == change_type - @pytest.mark.skip(reason='Args defined with \'=\' are not supported currently.') def test_cli_change(self, parent_config, changed_cli_config): """Test if command line conflict is resolved automatically""" change_type = evc.adapters.CommandLineChange.types[0] changed_cli_config['cli_change_type'] = change_type conflicts = detect_conflicts(parent_config, changed_cli_config) - ExperimentBranchBuilder(conflicts, {'manual_resolution': True}) + ExperimentBranchBuilder(conflicts, manual_resolution=True) assert len(conflicts.get()) == 2 assert len(conflicts.get_resolved()) == 2 @@ -902,7 +943,7 @@ def test_adapter_add_new(self, parent_config, cl_config): cl_config['metadata']['user_args'] = ['-w_d~+normal(0,1)'] conflicts = detect_conflicts(parent_config, cl_config) - branch_builder = ExperimentBranchBuilder(conflicts, {'manual_resolution': True}) + branch_builder = ExperimentBranchBuilder(conflicts, manual_resolution=True) adapters = branch_builder.create_adapters().adapters @@ -915,7 +956,7 @@ def test_adapter_add_changed(self, parent_config, cl_config): cl_config['metadata']['user_args'] = ['-y~+uniform(0,1)'] conflicts = detect_conflicts(parent_config, cl_config) - branch_builder = ExperimentBranchBuilder(conflicts, {'manual_resolution': True}) + branch_builder = ExperimentBranchBuilder(conflicts, manual_resolution=True) adapters = branch_builder.create_adapters().adapters @@ -928,7 +969,7 @@ def test_adapter_remove_missing(self, parent_config, cl_config): cl_config['metadata']['user_args'] = ['-z~-'] conflicts = detect_conflicts(parent_config, cl_config) - branch_builder = ExperimentBranchBuilder(conflicts, {'manual_resolution': True}) + branch_builder = ExperimentBranchBuilder(conflicts, manual_resolution=True) adapters = branch_builder.create_adapters().adapters @@ -939,10 +980,10 @@ def test_adapter_remove_missing(self, parent_config, cl_config): def test_adapter_rename_missing(self, parent_config, cl_config): """Test if a DimensionRenaming is created when solving a new conflict""" cl_config['metadata']['user_args'] = ['-x~>w_d', '-w_d~+uniform(0,1)'] - backward.populate_priors(cl_config['metadata']) + backward.populate_space(cl_config) conflicts = detect_conflicts(parent_config, cl_config) - branch_builder = ExperimentBranchBuilder(conflicts, {'manual_resolution': True}) + branch_builder = ExperimentBranchBuilder(conflicts, manual_resolution=True) adapters = branch_builder.create_adapters().adapters @@ -955,7 +996,7 @@ def test_adapter_rename_different_prior(self, parent_config, cl_config): cl_config['metadata']['user_args'] = ['-x~>w_d', '-w_d~+normal(0,1)'] conflicts = detect_conflicts(parent_config, cl_config) - branch_builder = ExperimentBranchBuilder(conflicts, {'manual_resolution': True}) + branch_builder = ExperimentBranchBuilder(conflicts, manual_resolution=True) adapters = branch_builder.create_adapters().adapters @@ -963,3 +1004,80 @@ def test_adapter_rename_different_prior(self, parent_config, cl_config): assert len(adapters) == 2 assert isinstance(adapters[0], evc.adapters.DimensionRenaming) assert isinstance(adapters[1], evc.adapters.DimensionPriorChange) + + +class TestResolutionsConfig(object): + """Test auto-resolution with specific types from orion.core.config.evc""" + + def test_cli_change(self, parent_config, changed_cli_config): + """Test if giving a proper change-type solves the command line conflict""" + conflicts = detect_conflicts(parent_config, changed_cli_config) + orion.core.config.evc.cli_change_type = 'noeffect' + ExperimentBranchBuilder(conflicts) + + assert len(conflicts.get()) == 2 + assert len(conflicts.get_resolved()) == 2 + + conflict = conflicts.get_resolved()[0] + assert conflict.is_resolved + assert isinstance(conflict, CommandLineConflict) + assert conflict.resolution.type == 'noeffect' + orion.core.config.evc.cli_change_type = 'break' + + def test_bad_cli_change(self, capsys, parent_config, changed_cli_config): + """Test if giving an invalid change-type fails the the resolution""" + conflicts = detect_conflicts(parent_config, changed_cli_config) + orion.core.config.evc.cli_change_type = 'bad-type' + ExperimentBranchBuilder(conflicts) + + assert len(conflicts.get()) == 2 + assert len(conflicts.get_resolved()) == 1 + orion.core.config.evc.cli_change_type = 'break' + + def test_code_change(self, parent_config, changed_code_config): + """Test if giving a proper change-type solves the code conflict""" + conflicts = detect_conflicts(parent_config, changed_code_config) + orion.core.config.evc.code_change_type = 'noeffect' + ExperimentBranchBuilder(conflicts) + + assert len(conflicts.get()) == 2 + assert len(conflicts.get_resolved()) == 2 + + conflict = conflicts.get_resolved()[0] + assert conflict.is_resolved + assert isinstance(conflict, CodeConflict) + assert conflict.resolution.type == 'noeffect' + orion.core.config.evc.code_change_type = 'break' + + def test_bad_code_change(self, capsys, parent_config, changed_code_config): + """Test if giving an invalid change-type prints error message and do nothing""" + conflicts = detect_conflicts(parent_config, changed_code_config) + orion.core.config.evc.code_change_type = 'bad-type' + ExperimentBranchBuilder(conflicts) + + assert len(conflicts.get()) == 2 + assert len(conflicts.get_resolved()) == 1 + orion.core.config.evc.code_change_type = 'break' + + def test_config_change(self, parent_config, changed_userconfig_config): + """Test if giving a proper change-type solves the user script config conflict""" + conflicts = detect_conflicts(parent_config, changed_userconfig_config) + orion.core.config.evc.config_change_type = 'noeffect' + ExperimentBranchBuilder(conflicts) + + assert len(conflicts.get()) == 4 + assert len(conflicts.get_resolved()) == 4 + + conflict = conflicts.get_resolved()[3] + assert conflict.is_resolved + assert isinstance(conflict, ScriptConfigConflict) + assert conflict.resolution.type == 'noeffect' + + def test_bad_config_change(self, capsys, parent_config, changed_userconfig_config): + """Test if giving an invalid change-type prints error message and do nothing""" + conflicts = detect_conflicts(parent_config, changed_userconfig_config) + orion.core.config.evc.config_change_type = 'bad-type' + ExperimentBranchBuilder(conflicts) + + assert len(conflicts.get()) == 4 + assert len(conflicts.get_resolved()) == 3 diff --git a/tests/unittests/core/test_ephemeraldb.py b/tests/unittests/core/test_ephemeraldb.py index 7d89cc10d..18dd7a4cb 100644 --- a/tests/unittests/core/test_ephemeraldb.py +++ b/tests/unittests/core/test_ephemeraldb.py @@ -8,6 +8,7 @@ from orion.core.io.database import Database, DatabaseError, DuplicateKeyError from orion.core.io.database.ephemeraldb import EphemeralCollection, EphemeralDB, EphemeralDocument +import orion.core.utils.backward as backward @pytest.fixture() @@ -113,6 +114,7 @@ def test_read_experiment(self, exp_config, orion_db): def test_read_with_id(self, exp_config, orion_db): """Query using ``_id`` key.""" loaded_config = orion_db.read('experiments', {'_id': exp_config[0][2]['_id']}) + backward.populate_space(loaded_config[0]) assert loaded_config == [exp_config[0][2]] def test_read_default(self, exp_config, orion_db): @@ -244,6 +246,7 @@ def test_read_and_write_one(self, database, orion_db, exp_config): {'name': 'supernaedo4'}, {'pool_size': 'lalala'}) exp_config[0][3]['pool_size'] = 'lalala' + backward.populate_space(loaded_config) assert loaded_config == exp_config[0][3] def test_read_and_write_many(self, database, orion_db, exp_config): @@ -259,6 +262,7 @@ def test_read_and_write_many(self, database, orion_db, exp_config): {'pool_size': 'lalala'}) exp_config[0][1]['pool_size'] = 'lalala' + backward.populate_space(loaded_config) assert loaded_config == exp_config[0][1] # Make sure it only changed the first document found @@ -289,7 +293,9 @@ def test_remove_many_default(self, exp_config, database, orion_db): assert orion_db.remove('experiments', filt) == count_filt assert database['experiments'].count() == count_before - count_filt assert database['experiments'].count() == 1 - assert list(database['experiments'].find()) == [exp_config[0][0]] + loaded_config = list(database['experiments'].find()) + backward.populate_space(loaded_config[0]) + assert loaded_config == [exp_config[0][0]] def test_remove_with_id(self, exp_config, database, orion_db): """Query using ``_id`` key.""" @@ -299,7 +305,30 @@ def test_remove_with_id(self, exp_config, database, orion_db): # call interface assert orion_db.remove('experiments', filt) == 1 assert database['experiments'].count() == count_before - 1 - assert database['experiments'].find() == exp_config[0][1:] + loaded_configs = database['experiments'].find() + for loaded_config in loaded_configs: + backward.populate_space(loaded_config) + assert loaded_configs == exp_config[0][1:] + + def test_remove_update_indexes(self, exp_config, database, orion_db): + """Verify that indexes are properly update after deletion.""" + with pytest.raises(DuplicateKeyError): + orion_db.write('experiments', {'_id': exp_config[0][0]['_id']}) + with pytest.raises(DuplicateKeyError): + orion_db.write('experiments', {'_id': exp_config[0][1]['_id']}) + + filt = {'_id': exp_config[0][0]['_id']} + + count_before = database['experiments'].count() + # call interface + assert orion_db.remove('experiments', filt) == 1 + assert database['experiments'].count() == count_before - 1 + # Should not fail now, otherwise it means the indexes were not updated properly during + # remove() + orion_db.write('experiments', filt) + # And this should still fail + with pytest.raises(DuplicateKeyError): + orion_db.write('experiments', {'_id': exp_config[0][1]['_id']}) @pytest.mark.usefixtures("clean_db") diff --git a/tests/unittests/core/test_insert.py b/tests/unittests/core/test_insert.py index f9de19ed0..d6968cfae 100644 --- a/tests/unittests/core/test_insert.py +++ b/tests/unittests/core/test_insert.py @@ -12,24 +12,24 @@ @pytest.fixture() def real_space(): """Fixture for real space""" - return SpaceBuilder().build_from(["-x~uniform(-10,20)"]) + return SpaceBuilder().build({"x": "uniform(-10,20)"}) @pytest.fixture() def integer_space(): """Fixture for integer space""" - return SpaceBuilder().build_from(["-x~uniform(-10,20,discrete=True)"]) + return SpaceBuilder().build({"x": "uniform(-10,20,discrete=True)"}) @pytest.fixture() def categorical_space(): """Fixture for categorical space""" - return SpaceBuilder().build_from(["-x~choices([10.1,11,'12','string'])"]) + return SpaceBuilder().build({"x": "choices([10.1,11,'12','string'])"}) def test_validate_input_value_real_real(real_space): """Test if real value passed to real space is validated properly""" - namespace = '/x' + namespace = 'x' is_valid, casted_value = _validate_input_value("10.0", real_space, namespace) assert is_valid assert isinstance(casted_value, numbers.Number) @@ -37,7 +37,7 @@ def test_validate_input_value_real_real(real_space): def test_validate_input_value_real_integer(real_space): """Test if integer value passed to real space is validated properly""" - namespace = '/x' + namespace = 'x' is_valid, casted_value = _validate_input_value("10", real_space, namespace) assert is_valid assert isinstance(casted_value, numbers.Number) @@ -45,14 +45,14 @@ def test_validate_input_value_real_integer(real_space): def test_validate_input_value_real_string(real_space): """Test if string value passed to real space is rejected properly""" - namespace = '/x' + namespace = 'x' is_valid, casted_value = _validate_input_value("string", real_space, namespace) assert not is_valid def test_validate_input_value_real_out_of_bound(real_space): """Test if out of bound values passed to real space are rejected properly""" - namespace = '/x' + namespace = 'x' is_valid, casted_value = _validate_input_value("100.0", real_space, namespace) assert not is_valid @@ -63,7 +63,7 @@ def test_validate_input_value_real_out_of_bound(real_space): def test_validate_input_value_integer_real(integer_space): """Test if real value passed to integer space is validated properly""" - namespace = '/x' + namespace = 'x' is_valid, casted_value = _validate_input_value("10.0", integer_space, namespace) assert is_valid @@ -72,7 +72,7 @@ def test_validate_input_value_integer_real(integer_space): def test_validate_input_value_integer_integer(integer_space): """Test if integer value passed to integer space is validated properly""" - namespace = '/x' + namespace = 'x' is_valid, casted_value = _validate_input_value("10", integer_space, namespace) assert is_valid @@ -81,7 +81,7 @@ def test_validate_input_value_integer_integer(integer_space): def test_validate_input_value_integer_string(integer_space): """Test if string value passed to integer space is rejected properly""" - namespace = '/x' + namespace = 'x' is_valid, casted_value = _validate_input_value("string", integer_space, namespace) assert not is_valid @@ -89,7 +89,7 @@ def test_validate_input_value_integer_string(integer_space): def test_validate_input_value_integer_out_of_bound(integer_space): """Test if out of bound values passed to integer space are rejected properly""" - namespace = '/x' + namespace = 'x' is_valid, casted_value = _validate_input_value("100.0", integer_space, namespace) assert not is_valid @@ -100,7 +100,7 @@ def test_validate_input_value_integer_out_of_bound(integer_space): def test_validate_input_value_categorical_real_hit(categorical_space): """Test if real value passed to categorical space is validated properly""" - namespace = '/x' + namespace = 'x' is_valid, casted_value = _validate_input_value("10.1", categorical_space, namespace) assert is_valid @@ -109,7 +109,7 @@ def test_validate_input_value_categorical_real_hit(categorical_space): def test_validate_input_value_categorical_real_nohit(categorical_space): """Test if bad real value passed to categorical space is rejected properly""" - namespace = '/x' + namespace = 'x' is_valid, casted_value = _validate_input_value("10", categorical_space, namespace) assert not is_valid @@ -123,7 +123,7 @@ def test_validate_input_value_categorical_real_nohit(categorical_space): def test_validate_input_value_categorical_integer_hit(categorical_space): """Test if integer value passed to categorical space is validated properly""" - namespace = '/x' + namespace = 'x' is_valid, casted_value = _validate_input_value("11", categorical_space, namespace) assert is_valid @@ -136,16 +136,15 @@ def test_validate_input_value_categorical_integer_hit(categorical_space): def test_validate_input_value_categorical_integer_nohit(categorical_space): """Test if bad integer value passed to categorical space is rejected properly""" - namespace = '/x' + namespace = 'x' - # pytest.set_trace() is_valid, casted_value = _validate_input_value("15", categorical_space, namespace) assert not is_valid def test_validate_input_value_categorical_string_number(categorical_space): """Test if string number value passed to categorical space is validated properly""" - namespace = '/x' + namespace = 'x' # Make sure integer 12 does not pass is_valid, casted_value = _validate_input_value("12", categorical_space, namespace) @@ -159,7 +158,7 @@ def test_validate_input_value_categorical_string_number(categorical_space): def test_validate_input_value_categorical_string_value(categorical_space): """Test if literal string value passed to categorical space is validated properly""" - namespace = '/x' + namespace = 'x' is_valid, casted_value = _validate_input_value("random", categorical_space, namespace) assert not is_valid diff --git a/tests/unittests/core/test_pickleddb.py b/tests/unittests/core/test_pickleddb.py index 0564c51b1..1b79802af 100644 --- a/tests/unittests/core/test_pickleddb.py +++ b/tests/unittests/core/test_pickleddb.py @@ -6,11 +6,13 @@ from multiprocessing import Pool import os +from filelock import FileLock, Timeout import pytest -from orion.core.io.database import Database, DuplicateKeyError +from orion.core.io.database import Database, DatabaseTimeout, DuplicateKeyError from orion.core.io.database.ephemeraldb import EphemeralCollection from orion.core.io.database.pickleddb import find_unpickable_doc, find_unpickable_field, PickledDB +import orion.core.utils.backward as backward @pytest.fixture() @@ -94,6 +96,7 @@ def test_read_experiment(self, exp_config, orion_db): def test_read_with_id(self, exp_config, orion_db): """Query using ``_id`` key.""" loaded_config = orion_db.read('experiments', {'_id': exp_config[0][2]['_id']}) + backward.populate_space(loaded_config[0]) assert loaded_config == [exp_config[0][2]] def test_read_default(self, exp_config, orion_db): @@ -220,6 +223,7 @@ def test_read_and_write_one(self, orion_db, exp_config): {'name': 'supernaedo4'}, {'pool_size': 'lalala'}) exp_config[0][3]['pool_size'] = 'lalala' + backward.populate_space(loaded_config) assert loaded_config == exp_config[0][3] def test_read_and_write_many(self, orion_db, exp_config): @@ -234,6 +238,7 @@ def test_read_and_write_many(self, orion_db, exp_config): {'pool_size': 'lalala'}) exp_config[0][1]['pool_size'] = 'lalala' + backward.populate_space(loaded_config) assert loaded_config == exp_config[0][1] # Make sure it only changed the first document found @@ -278,7 +283,9 @@ def test_remove_many_default(self, exp_config, orion_db): database = orion_db._get_database()._db assert database['experiments'].count() == count_before - count_filt assert database['experiments'].count() == 1 - assert list(database['experiments'].find()) == [exp_config[0][0]] + loaded_config = list(database['experiments'].find()) + backward.populate_space(loaded_config[0]) + assert loaded_config == [exp_config[0][0]] def test_remove_with_id(self, exp_config, orion_db): """Query using ``_id`` key.""" @@ -290,7 +297,32 @@ def test_remove_with_id(self, exp_config, orion_db): assert orion_db.remove('experiments', filt) == 1 database = orion_db._get_database()._db assert database['experiments'].count() == count_before - 1 - assert database['experiments'].find() == exp_config[0][1:] + loaded_configs = database['experiments'].find() + for loaded_config in loaded_configs: + backward.populate_space(loaded_config) + assert loaded_configs == exp_config[0][1:] + + def test_remove_update_indexes(self, exp_config, orion_db): + """Verify that indexes are properly update after deletion.""" + with pytest.raises(DuplicateKeyError): + orion_db.write('experiments', {'_id': exp_config[0][0]['_id']}) + with pytest.raises(DuplicateKeyError): + orion_db.write('experiments', {'_id': exp_config[0][1]['_id']}) + + filt = {'_id': exp_config[0][0]['_id']} + + database = orion_db._get_database()._db + count_before = database['experiments'].count() + # call interface + assert orion_db.remove('experiments', filt) == 1 + database = orion_db._get_database()._db + assert database['experiments'].count() == count_before - 1 + # Should not fail now, otherwise it means the indexes were not updated properly during + # remove() + orion_db.write('experiments', filt) + # And this should still fail + with pytest.raises(DuplicateKeyError): + orion_db.write('experiments', {'_id': exp_config[0][1]['_id']}) @pytest.mark.usefixtures("clean_db") @@ -412,3 +444,19 @@ def make_pickable(uid): key, value = find_unpickable_field(doc) assert key == 'b_unpickable', 'should return the unpickable field' assert isinstance(value, UnpickableClass), 'should return the unpickable value' + + +def test_query_timeout(monkeypatch, orion_db): + """Verify that filelock.Timeout is catched and reraised as DatabaseTimeout""" + orion_db.timeout = 0.1 + + def never_acquire(self, *arg, **kwargs): + """Do not try to acquire, raise timeout""" + raise Timeout(self) + + monkeypatch.setattr(FileLock, 'acquire', never_acquire) + + with pytest.raises(DatabaseTimeout) as exc: + orion_db.read('whatever', {'it should': 'fail'}) + + assert exc.match('Could not acquire lock for PickledDB after 0.1 seconds.') diff --git a/tests/unittests/core/test_strategy.py b/tests/unittests/core/test_strategy.py index 903940a89..763f51cfc 100644 --- a/tests/unittests/core/test_strategy.py +++ b/tests/unittests/core/test_strategy.py @@ -1,6 +1,8 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- """Collection of tests for :mod:`orion.core.worker.strategies`.""" +import logging + import pytest from orion.core.worker.strategy import ( @@ -23,6 +25,40 @@ def incomplete_trial(): return Trial(params=[{'name': 'a', 'type': 'integer', 'value': 6}]) +@pytest.fixture +def corrupted_trial(): + """Return a corrupted trial with results but status reserved""" + return Trial(params=[{'name': 'a', 'type': 'integer', 'value': 6}], + results=[{'name': 'objective', 'type': 'objective', 'value': 1}], + status='reserved') + + +strategies = [ + 'MaxParallelStrategy', 'MeanParallelStrategy', 'NoParallelStrategy', 'StubParallelStrategy'] + + +@pytest.mark.parametrize('strategy', strategies) +def test_handle_corrupted_trials(caplog, strategy, corrupted_trial): + """Verify that corrupted trials are handled properly""" + with caplog.at_level(logging.WARNING, logger="orion.core.worker.strategy"): + lie = Strategy(strategy).lie(corrupted_trial) + + match = "Trial `{}` has an objective but status is not completed".format(corrupted_trial.id) + assert match in caplog.text + + assert lie is not None + assert lie.value == corrupted_trial.objective.value + + +@pytest.mark.parametrize('strategy', strategies) +def test_handle_uncorrupted_trials(caplog, strategy, incomplete_trial): + """Verify that no warning is logged if trial is valid""" + with caplog.at_level(logging.WARNING, logger="orion.core.worker.strategy"): + Strategy(strategy).lie(incomplete_trial) + + assert "Trial `{}` has an objective but status is not completed" not in caplog.text + + class TestStrategyFactory: """Test creating a parallel strategy with the Strategy class""" diff --git a/tests/unittests/core/test_transformer.py b/tests/unittests/core/test_transformer.py index dc147a3fc..0bb2b6955 100644 --- a/tests/unittests/core/test_transformer.py +++ b/tests/unittests/core/test_transformer.py @@ -10,7 +10,7 @@ from orion.algo.space import (Categorical, Dimension, Integer, Real, Space,) from orion.core.worker.transformer import (build_required_space, Compose, Enumerate, Identity, - OneHotEncode, Quantize, Reverse, + OneHotEncode, Precision, Quantize, Reverse, TransformedDimension, TransformedSpace,) @@ -204,6 +204,49 @@ def test_repr_format(self): assert t.repr_format('asfa') == 'OneHotEncode(Enumerate(asfa))' +class TestPrecision(object): + """Test subclasses of `Precision` transformation.""" + + def test_deepcopy(self): + """Verify that the transformation object can be copied""" + t = Precision() + t.transform([2]) + copy.deepcopy(t) + + def test_domain_and_target_type(self): + """Check if attribute-like `domain_type` and `target_type` do + what's expected. + """ + t = Precision() + assert t.domain_type == 'real' + assert t.target_type == 'real' + + def test_transform(self): + """Check if it transforms properly.""" + t = Precision(precision=4) + assert t.transform(8.654321098) == 8.654 + assert t.transform(0.000123456789) == 0.0001235 + assert numpy.all(t.transform([8.654321098, 0.000123456789]) == + numpy.array([8.654, 0.0001235], dtype=float)) + + def test_reverse(self): + """Check if it reverses `transform` properly, if possible.""" + t = Precision() + assert t.reverse(9.) == 9. + assert t.reverse(5.) == 5. + assert numpy.all(t.reverse([9., 5.]) == numpy.array([9., 5.], dtype=float)) + + def test_infer_target_shape(self): + """Check if it infers the shape of a transformed `Dimension`.""" + t = Precision() + assert t.infer_target_shape((5,)) == (5,) + + def test_repr_format(self): + """Check representation of a transformed dimension.""" + t = Precision() + assert t.repr_format('asfa') == 'Precision(4, asfa)' + + class TestQuantize(object): """Test subclasses of `Quantize` transformation.""" @@ -488,7 +531,7 @@ def test_interval(self, tdim): def test_interval_from_categorical(self, tdim2): """Check how we should treat interval when original dimension is categorical.""" - assert tdim2.interval() == (-0.1, 1.1) + assert tdim2.interval() == ('asdfa', '2', '3', '4') def test_contains(self, tdim): """Check method `__contains__`.""" @@ -577,6 +620,11 @@ def test_type_property(self, tdim, tdim2): assert tdim.type == 'integer' assert tdim2.type == 'real' + def test_prior_name_property(self, tdim, tdim2): + """Check property `prior_name`.""" + assert tdim.prior_name == 'norm' + assert tdim2.prior_name == 'choices' + def test_shape_property(self, tdim, tdim2): """Check property `shape`.""" assert tdim.original_dimension.shape == (3, 2) @@ -662,14 +710,20 @@ def test_no_requirement(self, space_each_type): assert tspace[0].type == 'real' assert tspace[1].type == 'categorical' assert tspace[2].type == 'integer' - assert str(tspace) == str(space_each_type) + assert (str(tspace) == + "Space([Precision(4, Real(name=yolo, prior={norm: (0.9,), {}}, shape=(3, 2), default value=None)),\n" # noqa + " Categorical(name=yolo2, prior={asdfa: 0.10, 2: 0.20, 3: 0.30, 4: 0.40}, shape=(), default value=None),\n" # noqa + " Integer(name=yolo3, prior={randint: (3, 10), {}}, shape=(), default value=None)])") # noqa tspace = build_required_space([], space_each_type) assert len(tspace) == 3 assert tspace[0].type == 'real' assert tspace[1].type == 'categorical' assert tspace[2].type == 'integer' - assert str(tspace) == str(space_each_type) + assert (str(tspace) == + "Space([Precision(4, Real(name=yolo, prior={norm: (0.9,), {}}, shape=(3, 2), default value=None)),\n" # noqa + " Categorical(name=yolo2, prior={asdfa: 0.10, 2: 0.20, 3: 0.30, 4: 0.40}, shape=(), default value=None),\n" # noqa + " Integer(name=yolo3, prior={randint: (3, 10), {}}, shape=(), default value=None)])") # noqa def test_integer_requirement(self, space_each_type): """Check what is built using 'integer' requirement.""" @@ -691,22 +745,41 @@ def test_real_requirement(self, space_each_type): assert tspace[1].type == 'real' assert tspace[2].type == 'real' assert(str(tspace) == - "Space([Real(name=yolo, prior={norm: (0.9,), {}}, shape=(3, 2), default value=None),\n" # noqa + "Space([Precision(4, Real(name=yolo, prior={norm: (0.9,), {}}, shape=(3, 2), default value=None)),\n" # noqa " OneHotEncode(Enumerate(Categorical(name=yolo2, prior={asdfa: 0.10, 2: 0.20, 3: 0.30, 4: 0.40}, shape=(), default value=None))),\n" # noqa " ReverseQuantize(Integer(name=yolo3, prior={randint: (3, 10), {}}, shape=(), default value=None))])") # noqa + def test_capacity(self, space_each_type): + """Check transformer space capacity""" + tspace = build_required_space('real', space_each_type) + assert tspace.cardinality == numpy.inf + + space = Space() + probs = (0.1, 0.2, 0.3, 0.4) + categories = ('asdfa', 2, 3, 4) + dim = Categorical('yolo', OrderedDict(zip(categories, probs)), shape=2) + space.register(dim) + dim = Integer('yolo2', 'uniform', -3, 6) + space.register(dim) + tspace = build_required_space('integer', space) + assert tspace.cardinality == (4 * 2) * 6 + + dim = Integer('yolo3', 'uniform', -3, 6, shape=(2, 1)) + space.register(dim) + tspace = build_required_space('integer', space) + assert tspace.cardinality == (4 * 2) * 6 * 6 * (2 * 1) + def test_quantization_does_not_violate_bounds(): """Regress on bug that converts valid float in tdim to non valid excl. upper bound.""" dim = Integer('yo', 'uniform', 3, 7) transformers = [Reverse(Quantize())] tdim = TransformedDimension(Compose(transformers, dim.type), dim) - assert 10 not in dim - assert 9 in dim - assert 10 not in dim - assert 9 in dim - # but be careful, because upper bound is exclusive - assert 9.6 in tdim + assert 11 not in dim + assert 10 in dim + # but be careful, because upper bound is inclusive + assert 11.5 not in tdim + assert 10.6 in tdim assert tdim.reverse(9.6) in dim # solution is to quantize with 'floor' instead of 'round' assert tdim.reverse(9.6) == 9 diff --git a/tests/unittests/core/test_trial.py b/tests/unittests/core/test_trial.py index 00a646f2c..4cc4ddf71 100644 --- a/tests/unittests/core/test_trial.py +++ b/tests/unittests/core/test_trial.py @@ -21,7 +21,7 @@ def test_init_empty(self): assert t.start_time is None assert t.end_time is None assert t.results == [] - assert t.params == [] + assert t.params == {} assert t.working_dir is None def test_init_full(self, exp_config): @@ -37,7 +37,7 @@ def test_init_full(self, exp_config): assert t.results[0].name == exp_config[1][1]['results'][0]['name'] assert t.results[0].type == exp_config[1][1]['results'][0]['type'] assert t.results[0].value == exp_config[1][1]['results'][0]['value'] - assert list(map(lambda x: x.to_dict(), t.params)) == exp_config[1][1]['params'] + assert list(map(lambda x: x.to_dict(), t._params)) == exp_config[1][1]['params'] assert t.working_dir is None def test_higher_shapes_not_ndarray(self): @@ -47,7 +47,7 @@ def test_higher_shapes_not_ndarray(self): params = [dict(name='/x', type='real', value=value)] trial = Trial(params=params) - assert trial.params[0].value == expected + assert trial._params[0].value == expected def test_bad_access(self): """Other than `Trial.__slots__` are not allowed.""" @@ -117,8 +117,8 @@ def test_value_equal(self, exp_config): """Compare Param objects using __eq__""" trials = Trial.build(exp_config[1]) - assert trials[0].params[0] == Trial.Param(**exp_config[1][0]['params'][0]) - assert trials[0].params[1] != Trial.Param(**exp_config[1][0]['params'][0]) + assert trials[0]._params[0] == Trial.Param(**exp_config[1][0]['params'][0]) + assert trials[0]._params[1] != Trial.Param(**exp_config[1][0]['params'][0]) def test_str_trial(self, exp_config): """Test representation of `Trial`.""" @@ -129,8 +129,8 @@ def test_str_trial(self, exp_config): def test_str_value(self, exp_config): """Test representation of `Trial.Value`.""" t = Trial(**exp_config[1][1]) - assert str(t.params[1]) == "Param(name='/encoding_layer', "\ - "type='categorical', value='gru')" + assert (str(t._params[1]) == + "Param(name='/encoding_layer', type='categorical', value='gru')") def test_invalid_result(self, exp_config): """Test that invalid objectives cannot be set""" @@ -211,11 +211,13 @@ def test_gradient_property(self, exp_config): def test_params_repr_property(self, exp_config): """Check property `Trial.params_repr`.""" t = Trial(**exp_config[1][1]) - assert t.params_repr() == "/decoding_layer:lstm_with_attention,/encoding_layer:gru" - assert t.params_repr(sep='\n') == "/decoding_layer:lstm_with_attention\n/encoding_layer:gru" + assert Trial.format_params(t._params) == \ + "/decoding_layer:lstm_with_attention,/encoding_layer:gru" + assert Trial.format_params(t._params, sep='\n') == \ + "/decoding_layer:lstm_with_attention\n/encoding_layer:gru" t = Trial() - assert t.params_repr() == "" + assert Trial.format_params(t._params) == "" def test_hash_name_property(self, exp_config): """Check property `Trial.hash_name`.""" @@ -227,6 +229,38 @@ def test_hash_name_property(self, exp_config): t.hash_name assert 'params' in str(exc.value) + def test_param_name_property(self, exp_config): + """Check property `Trial.hash_params`.""" + exp_config[1][1]['params'].append({'name': '/max_epoch', 'type': 'fidelity', 'value': '1'}) + t1 = Trial(**exp_config[1][1]) + exp_config[1][1]['params'][-1]['value'] = '2' # changing the fidelity + t2 = Trial(**exp_config[1][1]) + assert t1.hash_name != t2.hash_name + assert t1.hash_params == t2.hash_params + + def test_hash_ignore_experiment(self, exp_config): + """Check property `Trial.compute_trial_hash(ignore_experiment=True)`.""" + exp_config[1][1]['params'].append({'name': '/max_epoch', 'type': 'fidelity', 'value': '1'}) + t1 = Trial(**exp_config[1][1]) + exp_config[1][1]['experiment'] = 'test' # changing the experiment name + t2 = Trial(**exp_config[1][1]) + assert t1.hash_name != t2.hash_name + assert t1.hash_params != t2.hash_params + assert (Trial.compute_trial_hash(t1, ignore_experiment=True) == + Trial.compute_trial_hash(t2, ignore_experiment=True)) + + def test_hash_ignore_lie(self, exp_config): + """Check property `Trial.compute_trial_hash(ignore_lie=True)`.""" + exp_config[1][1]['params'].append({'name': '/max_epoch', 'type': 'fidelity', 'value': '1'}) + t1 = Trial(**exp_config[1][1]) + # Add a lie + exp_config[1][1]['results'].append({'name': 'lie', 'type': 'lie', 'value': 1}) + t2 = Trial(**exp_config[1][1]) + assert t1.hash_name != t2.hash_name + assert t1.hash_params == t2.hash_params + assert (Trial.compute_trial_hash(t1, ignore_lie=True) == + Trial.compute_trial_hash(t2, ignore_lie=True)) + def test_full_name_property(self, exp_config): """Check property `Trial.full_name`.""" t = Trial(**exp_config[1][1]) diff --git a/tests/unittests/core/test_utils_format.py b/tests/unittests/core/test_utils_format.py index bb60dbbdb..0b5ab4080 100644 --- a/tests/unittests/core/test_utils_format.py +++ b/tests/unittests/core/test_utils_format.py @@ -4,7 +4,7 @@ import pytest -from orion.core.utils.format_trials import (trial_to_tuple, tuple_to_trial) +from orion.core.utils.format_trials import (dict_to_trial, trial_to_tuple, tuple_to_trial) from orion.core.worker.trial import Trial @@ -31,18 +31,53 @@ def trial(): return Trial(params=params) +@pytest.fixture() +def hierarchical_trial(): + """Stab trial with hierarchical params.""" + params = [ + dict( + name='yolo.first', + type='categorical', + value=('asdfa', 2) + ), + dict( + name='yolo.second', + type='integer', + value=0 + ), + dict( + name='yoloflat', + type='real', + value=3.5 + ) + ] + return Trial(params=params) + + +@pytest.fixture() +def dict_params(): + """Return dictionary of params to build a trial like `fixed_suggestion`""" + return {'yolo': ('asdfa', 2), 'yolo2': 0, 'yolo3': 3.5} + + +@pytest.fixture() +def hierarchical_dict_params(): + """Return dictionary of params to build a hierarchical trial""" + return {'yolo': {'first': ('asdfa', 2), 'second': 0}, 'yoloflat': 3.5} + + def test_trial_to_tuple(space, trial, fixed_suggestion): """Check if trial is correctly created from a sample/tuple.""" data = trial_to_tuple(trial, space) assert data == fixed_suggestion - trial.params[0].name = 'lalala' + trial._params[0].name = 'lalala' with pytest.raises(ValueError) as exc: trial_to_tuple(trial, space) assert "Trial params: [\'lalala\', \'yolo2\', \'yolo3\']" in str(exc.value) - trial.params.pop(0) + trial._params.pop(0) with pytest.raises(ValueError) as exc: trial_to_tuple(trial, space) @@ -59,9 +94,24 @@ def test_tuple_to_trial(space, trial, fixed_suggestion): assert t.start_time is None assert t.end_time is None assert t.results == [] - assert len(t.params) == len(trial.params) + assert len(t._params) == len(trial.params) + for i in range(len(t.params)): + assert t._params[i].to_dict() == trial._params[i].to_dict() + + +def test_dict_to_trial(space, trial, dict_params): + """Check if dict is converted successfully to trial.""" + t = dict_to_trial(dict_params, space) + assert t.experiment is None + assert t.status == 'new' + assert t.worker is None + assert t.submit_time is None + assert t.start_time is None + assert t.end_time is None + assert t.results == [] + assert len(t._params) == len(trial._params) for i in range(len(t.params)): - assert t.params[i].to_dict() == trial.params[i].to_dict() + assert t._params[i].to_dict() == trial._params[i].to_dict() def test_tuple_to_trial_to_tuple(space, trial, fixed_suggestion): @@ -77,6 +127,29 @@ def test_tuple_to_trial_to_tuple(space, trial, fixed_suggestion): assert t.start_time is None assert t.end_time is None assert t.results == [] - assert len(t.params) == len(trial.params) + assert len(t._params) == len(trial._params) + for i in range(len(t._params)): + assert t._params[i].to_dict() == trial._params[i].to_dict() + + +def test_hierarchical_trial_to_tuple(hierarchical_space, hierarchical_trial, fixed_suggestion): + """Check if hierarchical trial is correctly created from a sample/tuple.""" + data = trial_to_tuple(hierarchical_trial, hierarchical_space) + assert data == fixed_suggestion + + +def test_tuple_to_hierarchical_trial(hierarchical_space, hierarchical_trial, fixed_suggestion): + """Check if sample is recovered successfully from hierarchical trial.""" + t = tuple_to_trial(fixed_suggestion, hierarchical_space) + assert len(t._params) == len(hierarchical_trial._params) + for i in range(len(t._params)): + assert t._params[i].to_dict() == hierarchical_trial._params[i].to_dict() + + +def test_hierarchical_dict_to_trial(hierarchical_space, hierarchical_trial, + hierarchical_dict_params): + """Check if hierarchical dict is converted successfully to trial.""" + t = dict_to_trial(hierarchical_dict_params, hierarchical_space) + assert len(t._params) == len(hierarchical_trial._params) for i in range(len(t.params)): - assert t.params[i].to_dict() == trial.params[i].to_dict() + assert t._params[i].to_dict() == hierarchical_trial._params[i].to_dict() diff --git a/tests/unittests/core/worker/test_consumer.py b/tests/unittests/core/worker/test_consumer.py index 5a70b41b1..0e4abbf04 100644 --- a/tests/unittests/core/worker/test_consumer.py +++ b/tests/unittests/core/worker/test_consumer.py @@ -8,7 +8,7 @@ import pytest -from orion.core.io.experiment_builder import ExperimentBuilder +import orion.core.io.experiment_builder as experiment_builder import orion.core.utils.backward as backward from orion.core.utils.format_trials import tuple_to_trial import orion.core.worker.consumer as consumer @@ -24,7 +24,8 @@ def config(exp_config): config['metadata']['user_args'] = ['--x~uniform(-50, 50)'] config['name'] = 'exp' config['working_dir'] = "/tmp/orion" - backward.populate_priors(config['metadata']) + backward.populate_space(config) + config['space'] = config['metadata']['priors'] return config @@ -34,7 +35,7 @@ def test_trials_interrupted_keyboard_int(config, monkeypatch): def mock_Popen(*args, **kwargs): raise KeyboardInterrupt - exp = ExperimentBuilder().build_from(config) + exp = experiment_builder.build(**config) monkeypatch.setattr(consumer.subprocess, "Popen", mock_Popen) @@ -58,7 +59,7 @@ def test_trials_interrupted_sigterm(config, monkeypatch): def mock_popen(*args, **kwargs): os.kill(os.getpid(), signal.SIGTERM) - exp = ExperimentBuilder().build_from(config) + exp = experiment_builder.build(**config) monkeypatch.setattr(subprocess.Popen, "wait", mock_popen) @@ -79,11 +80,11 @@ def mock_popen(*args, **kwargs): @pytest.mark.usefixtures("create_db_instance") def test_pacemaker_termination(config, monkeypatch): """Check if pacemaker stops as soon as the trial completes.""" - exp = ExperimentBuilder().build_from(config) + exp = experiment_builder.build(**config) trial = tuple_to_trial((1.0,), exp.space) - exp.register_trial(trial) + exp.register_trial(trial, status='reserved') con = Consumer(exp) @@ -100,11 +101,11 @@ def test_pacemaker_termination(config, monkeypatch): @pytest.mark.usefixtures("create_db_instance") def test_trial_working_dir_is_changed(config, monkeypatch): """Check that trial has its working_dir attribute changed.""" - exp = ExperimentBuilder().build_from(config) + exp = experiment_builder.build(**config) trial = tuple_to_trial((1.0,), exp.space) - exp.register_trial(trial) + exp.register_trial(trial, status='reserved') con = Consumer(exp) con.consume(trial) diff --git a/tests/unittests/core/worker/test_experiment.py b/tests/unittests/core/worker/test_experiment.py index 5c55d7ff0..07d85eab9 100644 --- a/tests/unittests/core/worker/test_experiment.py +++ b/tests/unittests/core/worker/test_experiment.py @@ -10,23 +10,17 @@ import pytest -from orion.algo.base import BaseAlgorithm -from orion.algo.space import Space import orion.core -from orion.core.evc.adapters import BaseAdapter -from orion.core.io.database import DuplicateKeyError +from orion.core.io.space_builder import SpaceBuilder import orion.core.utils.backward as backward -from orion.core.utils.exceptions import RaceCondition from orion.core.utils.tests import OrionState import orion.core.worker.experiment from orion.core.worker.experiment import Experiment, ExperimentView +from orion.core.worker.primary_algo import PrimaryAlgo from orion.core.worker.trial import Trial from orion.storage.base import get_storage -logging.basicConfig(level=logging.DEBUG) - - @pytest.fixture() def new_config(random_dt): """Create a configuration that will not hit the database.""" @@ -50,12 +44,12 @@ def new_config(random_dt): algorithms={'dumbalgo': {}}, producer={'strategy': 'NoParallelStrategy'}, # attrs starting with '_' also - _id='fasdfasfa', + # _id='fasdfasfa', # and in general anything which is not in Experiment's slots something_to_be_ignored='asdfa' ) - backward.populate_priors(new_config['metadata']) + backward.populate_space(new_config) return new_config @@ -71,7 +65,7 @@ def parent_version_config(): metadata={'user': 'corneauf', 'datetime': datetime.datetime.utcnow(), 'user_args': ['--x~normal(0,1)']}) - backward.populate_priors(config['metadata']) + backward.populate_space(config) return config @@ -85,10 +79,72 @@ def child_version_config(parent_version_config): config['refers'] = {'parent_id': 'parent_config'} config['metadata']['datetime'] = datetime.datetime.utcnow() config['metadata']['user_args'].append('--y~+normal(0,1)') - backward.populate_priors(config['metadata']) + backward.populate_space(config) return config +def _generate(obj, *args, value): + if obj is None: + return None + + obj = copy.deepcopy(obj) + data = obj + + for arg in args[:-1]: + data = data[arg] + + data[args[-1]] = value + return obj + + +base_trial = { + 'experiment': 0, + 'status': 'new', # new, reserved, suspended, completed, broken + 'worker': None, + 'submit_time': '2017-11-23T02:00:00', + 'start_time': None, + 'end_time': None, + 'heartbeat': None, + 'results': [ + {'name': 'loss', + 'type': 'objective', # objective, constraint + 'value': 2} + ], + 'params': [ + {'name': '/encoding_layer', + 'type': 'categorical', + 'value': 'rnn'}, + {'name': '/decoding_layer', + 'type': 'categorical', + 'value': 'lstm_with_attention'} + ] +} + + +def generate_trials(status): + """Generate Trials with different configurations""" + new_trials = [_generate(base_trial, 'status', value=s) for s in status] + + for i, trial in enumerate(new_trials): + if trial['status'] != 'new': + trial['start_time'] = datetime.datetime.utcnow() + datetime.timedelta(seconds=i) + + for i, trial in enumerate(new_trials): + if trial['status'] == 'completed': + trial['end_time'] = datetime.datetime.utcnow() + datetime.timedelta(seconds=i) + + # make each trial unique + for i, trial in enumerate(new_trials): + trial['results'][0]['value'] = i + trial['params'].append({ + 'name': '/index', + 'type': 'categorical', + 'value': i + }) + + return new_trials + + def assert_protocol(exp, create_db_instance): """Transitional method to move away from mongodb""" assert exp._storage._db is create_db_instance @@ -104,613 +160,16 @@ def get_db_from_view(exp): return exp._storage._db._db -class TestInitExperiment(object): - """Create new Experiment instance.""" - - @pytest.mark.usefixtures("with_user_tsirif") - def test_new_experiment_due_to_name(self, create_db_instance, random_dt): - """Hit user name, but exp_name does not hit the db, create new entry.""" - exp = Experiment('supernaekei') - assert exp._init_done is False - assert_protocol(exp, create_db_instance) - assert exp._id is None - assert exp.name == 'supernaekei' - assert exp.refers == {} - assert exp.metadata['user'] == 'tsirif' - assert len(exp.metadata) == 1 - assert exp.pool_size is None - assert exp.max_trials is None - assert exp.algorithms is None - assert exp.working_dir is None - assert exp.version == 1 - with pytest.raises(AttributeError): - exp.this_is_not_in_config = 5 - - @pytest.mark.usefixtures("with_user_tsirif") - def test_existing_experiment(self, create_db_instance, exp_config): - """Hit exp_name + user's name in the db, fetch most recent entry.""" - exp = Experiment('supernaedo2-dendi') - assert exp._init_done is False - assert_protocol(exp, create_db_instance) - assert exp._id == exp_config[0][0]['_id'] - assert exp.name == exp_config[0][0]['name'] - assert exp.refers == exp_config[0][0]['refers'] - assert exp.metadata == exp_config[0][0]['metadata'] - assert exp.pool_size == exp_config[0][0]['pool_size'] - assert exp.max_trials == exp_config[0][0]['max_trials'] - assert exp.algorithms == exp_config[0][0]['algorithms'] - assert exp.working_dir == exp_config[0][0]['working_dir'] - assert exp.version == 1 - with pytest.raises(AttributeError): - exp.this_is_not_in_config = 5 - - def test_new_experiment_wout_version(self, create_db_instance): - """Create a new and never-seen-before experiment without a version.""" - exp = Experiment("exp_wout_version") - assert exp.version == 1 - - def test_new_experiment_w_version(self, create_db_instance): - """Create a new and never-seen-before experiment with a version.""" - exp = Experiment("exp_wout_version", version=1) - assert exp.version == 1 - - def test_backward_compatibility_no_version(self, create_db_instance, parent_version_config, - child_version_config): - """Branch from parent that has no version field.""" - parent_version_config.pop('version') - create_db_instance.write('experiments', parent_version_config) - create_db_instance.write('experiments', child_version_config) - - exp = Experiment("old_experiment", user="corneauf") - assert exp.version == 2 - - def test_old_experiment_wout_version(self, create_db_instance, parent_version_config, - child_version_config): - """Create an already existing experiment without a version.""" - create_db_instance.write('experiments', parent_version_config) - create_db_instance.write('experiments', child_version_config) - - exp = Experiment("old_experiment", user="corneauf") - assert exp.version == 2 - - def test_old_experiment_w_version(self, create_db_instance, parent_version_config, - child_version_config): - """Create an already existing experiment with a version.""" - create_db_instance.write('experiments', parent_version_config) - create_db_instance.write('experiments', child_version_config) - - exp = Experiment("old_experiment", user="corneauf", version=1) - assert exp.version == 1 - - def test_old_experiment_w_version_bigger_than_max(self, create_db_instance, - parent_version_config, child_version_config): - """Create an already existing experiment with a too large version.""" - create_db_instance.write('experiments', parent_version_config) - create_db_instance.write('experiments', child_version_config) - - exp = Experiment("old_experiment", user="corneauf", version=8) - assert exp.version == 2 - - -@pytest.mark.usefixtures("create_db_instance", "with_user_tsirif") -class TestConfigProperty(object): - """Get and set experiment's configuration, finilize initialization process.""" - - def test_get_before_init_has_hit(self, exp_config, random_dt): - """Return a configuration dict according to an experiment object. - - Assuming that experiment's (exp's name, user's name) has hit the database. - """ - exp = Experiment('supernaedo2-dendi') - exp_config[0][0].pop('_id') - exp_config[0][0]['version'] = 1 - assert exp.configuration == exp_config[0][0] - - def test_get_before_init_no_hit(self, exp_config, random_dt): - """Return a configuration dict according to an experiment object. - - Before initialization is done, it can be the case that the pair (`name`, - user's name) has not hit the database. return a yaml compliant form - of current state, to be used with :mod:`orion.core.io.resolve_config`. - """ - exp = Experiment('supernaekei') - cfg = exp.configuration - assert cfg['name'] == 'supernaekei' - assert cfg['refers'] == {} - assert cfg['metadata']['user'] == 'tsirif' - assert len(cfg['metadata']) == 1 - assert cfg['pool_size'] is None - assert cfg['max_trials'] is None - assert cfg['algorithms'] is None - assert cfg['working_dir'] is None - assert cfg['version'] == 1 - - @pytest.mark.skip(reason='Interactive prompt problems') - def test_good_set_before_init_hit_with_diffs(self, exp_config): - """Trying to set, and differences were found from the config pulled from db. - - In this case: - 1. Force renaming of experiment, prompt user for new name. - 2. Fork from experiment with previous name. New experiments refers to the - old one, if user wants to. - 3. Overwrite elements with the ones from input. - - .. warning:: Currently, not implemented. - """ - new_config = copy.deepcopy(exp_config[0][1]) - new_config['metadata']['user_version'] = 1.2 - exp = Experiment('supernaedo2') - - exp.configure(new_config) - - def test_good_set_before_init_hit_no_diffs_exc_max_trials(self, exp_config): - """Trying to set, and NO differences were found from the config pulled from db. - - Everything is normal, nothing changes. Experiment is resumed, - perhaps with more trials to evaluate (an exception is 'max_trials'). - """ - exp = Experiment('supernaedo2-dendi') - # Deliver an external configuration to finalize init - exp_config[0][0]['max_trials'] = 5000 - exp.configure(exp_config[0][0]) - exp_config[0][0]['algorithms']['dumbalgo']['done'] = False - exp_config[0][0]['algorithms']['dumbalgo']['judgement'] = None - exp_config[0][0]['algorithms']['dumbalgo']['scoring'] = 0 - exp_config[0][0]['algorithms']['dumbalgo']['suspend'] = False - exp_config[0][0]['algorithms']['dumbalgo']['value'] = 5 - exp_config[0][0]['algorithms']['dumbalgo']['seed'] = None - exp_config[0][0]['producer']['strategy'] = "NoParallelStrategy" - assert exp._id == exp_config[0][0].pop('_id') - assert exp.configuration == exp_config[0][0] - - def test_good_set_before_init_hit_no_diffs_exc_pool_size(self, exp_config): - """Trying to set, and NO differences were found from the config pulled from db. - - Everything is normal, nothing changes. Experiment is resumed, - perhaps with more workers that evaluate (an exception is 'pool_size'). - """ - exp = Experiment('supernaedo2-dendi') - # Deliver an external configuration to finalize init - exp_config[0][0]['pool_size'] = 10 - exp.configure(exp_config[0][0]) - exp_config[0][0]['algorithms']['dumbalgo']['done'] = False - exp_config[0][0]['algorithms']['dumbalgo']['judgement'] = None - exp_config[0][0]['algorithms']['dumbalgo']['scoring'] = 0 - exp_config[0][0]['algorithms']['dumbalgo']['suspend'] = False - exp_config[0][0]['algorithms']['dumbalgo']['value'] = 5 - exp_config[0][0]['algorithms']['dumbalgo']['seed'] = None - exp_config[0][0]['producer']['strategy'] = "NoParallelStrategy" - assert exp._id == exp_config[0][0].pop('_id') - assert exp.configuration == exp_config[0][0] - - def test_good_set_before_init_no_hit(self, random_dt, database, new_config): - """Trying to set, overwrite everything from input.""" - exp = Experiment(new_config['name']) - exp.configure(new_config) - assert exp._init_done is True - found_config = list(database.experiments.find({'name': 'supernaekei', - 'metadata.user': 'tsirif'})) - - new_config['metadata']['datetime'] = exp.metadata['datetime'] - - assert len(found_config) == 1 - _id = found_config[0].pop('_id') - assert _id != 'fasdfasfa' - assert exp._id == _id - new_config['refers'] = {} - new_config.pop('_id') - new_config.pop('something_to_be_ignored') - new_config['algorithms']['dumbalgo']['done'] = False - new_config['algorithms']['dumbalgo']['judgement'] = None - new_config['algorithms']['dumbalgo']['scoring'] = 0 - new_config['algorithms']['dumbalgo']['suspend'] = False - new_config['algorithms']['dumbalgo']['value'] = 5 - new_config['algorithms']['dumbalgo']['seed'] = None - new_config['refers'] = {'adapter': [], 'parent_id': None, 'root_id': _id} - assert found_config[0] == new_config - assert exp.name == new_config['name'] - assert exp.configuration['refers'] == new_config['refers'] - assert exp.metadata == new_config['metadata'] - assert exp.pool_size == new_config['pool_size'] - assert exp.max_trials == new_config['max_trials'] - assert exp.working_dir == new_config['working_dir'] - assert exp.version == new_config['version'] - # assert exp.algorithms == new_config['algorithms'] - - def test_working_dir_is_correctly_set(self, database, new_config): - """Check if working_dir is correctly changed.""" - exp = Experiment(new_config['name']) - exp.configure(new_config) - assert exp._init_done is True - database.experiments.update_one({'name': 'supernaekei', 'metadata.user': 'tsirif'}, - {'$set': {'working_dir': './'}}) - found_config = list(database.experiments.find({'name': 'supernaekei', - 'metadata.user': 'tsirif'})) - - found_config = found_config[0] - exp = Experiment(found_config['name']) - exp.configure(found_config) - assert exp.working_dir == './' - - def test_working_dir_works_when_db_absent(self, database, new_config): - """Check if working_dir is correctly when absent from the database.""" - exp = Experiment(new_config['name']) - exp.configure(new_config) - assert exp._init_done is True - database.experiments.update_one({'name': 'supernaekei', 'metadata.user': 'tsirif'}, - {'$unset': {'working_dir': ''}}) - found_config = list(database.experiments.find({'name': 'supernaekei', - 'metadata.user': 'tsirif'})) - - found_config = found_config[0] - exp = Experiment(found_config['name']) - exp.configure(found_config) - assert exp.working_dir is None - - def test_inconsistent_1_set_before_init_no_hit(self, random_dt, new_config): - """Test inconsistent configuration because of name.""" - exp = Experiment(new_config['name']) - new_config['name'] = 'asdfaa' - with pytest.raises(ValueError) as exc_info: - exp.configure(new_config) - assert 'inconsistent' in str(exc_info.value) - - def test_inconsistent_2_set_before_init_no_hit(self, random_dt, new_config): - """Test inconsistent configuration because of user.""" - exp = Experiment(new_config['name']) - new_config['metadata']['user'] = 'asdfaa' - with pytest.raises(ValueError) as exc_info: - exp.configure(new_config) - assert 'inconsistent' in str(exc_info.value) - - def test_not_inconsistent_3_set_before_init_no_hit(self, random_dt, new_config): - """Test inconsistent configuration because of datetime.""" - exp = Experiment(new_config['name']) - new_config['metadata']['datetime'] = 123 - exp.configure(new_config) - - def test_get_after_init_plus_hit_no_diffs(self, exp_config): - """Return a configuration dict according to an experiment object. - - Before initialization is done, it can be the case that the pair (`name`, - user's name) has not hit the database. return a yaml compliant form - of current state, to be used with :mod:`orion.core.cli.esolve_config`. - """ - exp = Experiment('supernaedo2-dendi') - # Deliver an external configuration to finalize init - experiment_count_before = count_experiment(exp) - exp.configure(exp_config[0][0]) - assert exp._init_done is True - exp_config[0][0]['algorithms']['dumbalgo']['done'] = False - exp_config[0][0]['algorithms']['dumbalgo']['judgement'] = None - exp_config[0][0]['algorithms']['dumbalgo']['scoring'] = 0 - exp_config[0][0]['algorithms']['dumbalgo']['suspend'] = False - exp_config[0][0]['algorithms']['dumbalgo']['value'] = 5 - exp_config[0][0]['algorithms']['dumbalgo']['seed'] = None - exp_config[0][0]['producer']['strategy'] = "NoParallelStrategy" - assert exp._id == exp_config[0][0].pop('_id') - assert exp.configuration == exp_config[0][0] - assert experiment_count_before == count_experiment(exp) - - def test_instantiation_after_init(self, exp_config): - """Verify that algo, space and refers was instanciated properly""" - exp = Experiment('supernaedo2-dendi') - assert not isinstance(exp.algorithms, BaseAlgorithm) - assert not isinstance(exp.space, Space) - assert not isinstance(exp.refers['adapter'], BaseAdapter) - # Deliver an external configuration to finalize init - exp.configure(exp_config[0][0]) - assert exp._init_done is True - assert isinstance(exp.algorithms, BaseAlgorithm) - assert isinstance(exp.space, Space) - assert isinstance(exp.refers['adapter'], BaseAdapter) - - def test_try_set_after_init(self, exp_config): - """Cannot set a configuration after init (currently).""" - exp = Experiment('supernaedo2') - # Deliver an external configuration to finalize init - exp.configure(exp_config[0][0]) - assert exp._init_done is True - with pytest.raises(RuntimeError) as exc_info: - exp.configure(exp_config[0][0]) - assert 'cannot reset' in str(exc_info.value) - - def test_try_set_after_race_condition(self, exp_config, new_config): - """Cannot set a configuration after init if it looses a race - condition. - - The experiment from process which first writes to db is initialized - properly. The experiment which looses the race condition cannot be - initialized and needs to be rebuilt. - """ - exp = Experiment(new_config['name']) - assert exp.id is None - # Another experiment gets configured first - experiment_count_before = count_experiment(exp) - naughty_little_exp = Experiment(new_config['name']) - assert naughty_little_exp.id is None - naughty_little_exp.configure(new_config) - assert naughty_little_exp._init_done is True - assert exp._init_done is False - assert (experiment_count_before + 1) == count_experiment(exp) - - # First experiment won't be able to be configured - with pytest.raises(DuplicateKeyError) as exc_info: - exp.configure(new_config) - - assert 'duplicate key error' in str(exc_info.value) - - assert (experiment_count_before + 1) == count_experiment(exp) - - def test_try_set_after_race_condition_with_hit(self, exp_config, new_config): - """Cannot set a configuration after init if config is built - from no-hit (without up-to-date db info) and new exp is hit - - The experiment from process which first writes to db is initialized - properly. The experiment which looses the race condition cannot be - initialized and needs to be rebuilt. - """ - # Another experiment gets configured first - naughty_little_exp = Experiment(new_config['name']) - assert naughty_little_exp.id is None - experiment_count_before = count_experiment(naughty_little_exp) - naughty_little_exp.configure(copy.deepcopy(new_config)) - assert naughty_little_exp._init_done is True - - exp = Experiment(new_config['name']) - assert exp._init_done is False - assert (experiment_count_before + 1) == count_experiment(exp) - # Experiment with hit won't be able to be configured with config without db info - with pytest.raises(DuplicateKeyError) as exc_info: - exp.configure(new_config) - assert 'Cannot register an existing experiment with a new config' in str(exc_info.value) - - assert (experiment_count_before + 1) == count_experiment(exp) - - new_config['metadata']['datetime'] = naughty_little_exp.metadata['datetime'] - exp = Experiment(new_config['name']) - assert exp._init_done is False - assert (experiment_count_before + 1) == count_experiment(exp) - # New experiment will be able to be configured - exp.configure(new_config) - - assert (experiment_count_before + 1) == count_experiment(exp) - - def test_try_reset_after_race_condition(self, exp_config, new_config): - """Cannot set a configuration after init if it looses a race condition, - but can set it if reloaded. - - The experiment from process which first writes to db is initialized - properly. The experiment which looses the race condition cannot be - initialized and needs to be rebuilt. - """ - exp = Experiment(new_config['name']) - # Another experiment gets configured first - experiment_count_before = count_experiment(exp) - naughty_little_exp = Experiment(new_config['name']) - naughty_little_exp.configure(new_config) - assert naughty_little_exp._init_done is True - assert exp._init_done is False - assert (experiment_count_before + 1) == count_experiment(exp) - # First experiment won't be able to be configured - with pytest.raises(DuplicateKeyError) as exc_info: - exp.configure(new_config) - assert 'duplicate key error' in str(exc_info.value) - - # Still not more experiment in DB - assert (experiment_count_before + 1) == count_experiment(exp) - - # Retry configuring the experiment - new_config['metadata']['datetime'] = naughty_little_exp.metadata['datetime'] - exp = Experiment(new_config['name']) - exp.configure(new_config) - assert exp._init_done is True - assert (experiment_count_before + 1) == count_experiment(exp) - assert exp.configuration == naughty_little_exp.configuration - - def test_after_init_algorithms_are_objects(self, exp_config): - """Attribute exp.algorithms become objects after init.""" - exp = Experiment('supernaedo2') - # Deliver an external configuration to finalize init - exp.configure(exp_config[0][0]) - assert isinstance(exp.algorithms, BaseAlgorithm) - - @pytest.mark.skip(reason="To be implemented...") - def test_after_init_refers_are_objects(self, exp_config): - """Attribute exp.refers become objects after init.""" - pass - - def test_algorithm_config_with_just_a_string(self, exp_config): - """Test that configuring an algorithm with just a string is OK.""" - new_config = copy.deepcopy(exp_config[0][2]) - new_config['algorithms'] = 'dumbalgo' - exp = Experiment('supernaedo3') - exp.configure(new_config) - new_config['algorithms'] = dict() - new_config['algorithms']['dumbalgo'] = dict() - new_config['algorithms']['dumbalgo']['done'] = False - new_config['algorithms']['dumbalgo']['judgement'] = None - new_config['algorithms']['dumbalgo']['scoring'] = 0 - new_config['algorithms']['dumbalgo']['suspend'] = False - new_config['algorithms']['dumbalgo']['value'] = 5 - new_config['algorithms']['dumbalgo']['seed'] = None - assert exp._id == new_config.pop('_id') - assert exp.configuration['algorithms'] == new_config['algorithms'] - - @pytest.mark.usefixtures("trial_id_substitution") - def test_status_is_pending_when_increase_max_trials(self, exp_config): - """Attribute exp.algorithms become objects after init.""" - exp = Experiment('supernaedo4') - - # Deliver an external configuration to finalize init - exp.configure(exp_config[0][3]) - - assert exp.is_done - - exp = Experiment('supernaedo4') - # Deliver an external configuration to finalize init - exp_config[0][3]['max_trials'] = 1000 - exp.configure(exp_config[0][3]) +@pytest.fixture() +def space(): + """Build a space object""" + return SpaceBuilder().build({'x': 'uniform(0, 10)'}) - assert not exp.is_done - def test_new_child_with_branch(self): - """Check that experiment is not incremented when branching with a new name.""" - user_args = ['--x~normal(0,1)'] - metadata = dict(user='tsirif', datetime=datetime.datetime.utcnow(), user_args=user_args) - algorithms = {'random': {'seed': None}} - config = dict(name='experiment_test', metadata=metadata, version=1, algorithms=algorithms) - backward.populate_priors(config['metadata']) - - get_storage().create_experiment(config) - original = Experiment('experiment_test', version=1) - - config['branch'] = ['experiment_2'] - config['metadata']['user_args'].pop() - config['metadata']['user_args'].append("--z~+normal(0,1)") - backward.populate_priors(config['metadata']) - config['version'] = 1 - exp = Experiment('experiment_test', version=1) - exp.configure(config) - - assert exp.version == 1 - assert '/z' in exp.space - assert exp.refers['parent_id'] == original.id - - def test_no_increment_when_child_exist(self): - """Check that experiment cannot be incremented when asked for v1 while v2 exists.""" - user_args = ['--x~normal(0,1)'] - metadata = dict(user='tsirif', datetime=datetime.datetime.utcnow(), user_args=user_args) - algorithms = {'random': {'seed': None}} - config = dict(name='experiment_test', metadata=metadata, version=1, algorithms=algorithms) - backward.populate_priors(config['metadata']) - - get_storage().create_experiment(config) - parent_id = config.pop('_id') - - config['version'] = 2 - config['metadata']['user_args'].append("--y~+normal(0,1)") - backward.populate_priors(config['metadata']) - config['refers'] = dict(parent_id=parent_id, root_id=parent_id, adapters=[]) - - get_storage().create_experiment(config) - config.pop('_id') - - config['metadata']['user_args'].pop() - config['metadata']['user_args'].append("--z~+normal(0,1)") - backward.populate_priors(config['metadata']) - config['version'] = 1 - config.pop('refers') - exp = Experiment('experiment_test', version=1) - - with pytest.raises(ValueError) as exc_info: - exp.configure(config) - assert 'Configuration is different and generates a branching' in str(exc_info.value) - - def test_old_experiment_wout_version(self, parent_version_config, - child_version_config): - """Create an already existing experiment without a version.""" - algorithm = {'random': {'seed': None}} - parent_version_config['algorithms'] = algorithm - child_version_config['algorithms'] = algorithm - - storage = get_storage() - storage.create_experiment(parent_version_config) - storage.create_experiment(child_version_config) - - exp = Experiment("old_experiment", user="corneauf") - exp.configure(child_version_config) - - assert exp.version == 2 - - def test_old_experiment_w_version(self, parent_version_config, - child_version_config): - """Create an already existing experiment with a version.""" - algorithm = {'random': {'seed': None}} - parent_version_config['algorithms'] = algorithm - child_version_config['algorithms'] = algorithm - - storage = get_storage() - storage.create_experiment(parent_version_config) - storage.create_experiment(child_version_config) - - exp = Experiment("old_experiment", user="corneauf", version=1) - exp.configure(parent_version_config) - - assert exp.version == 1 - - def test_race_condition_w_version(self): - """Test that an experiment loosing the race condition during version increment cannot - be resolved automatically if a version number was specified. - - Note that if we would raise RaceCondition, the conflict would still occur since - the version number fetched will not be the new one from the resolution but the requested - one. Therefore raising and handling RaceCondition would lead to infinite recursion in - the experiment builder. - """ - user_args = ['--x~normal(0,1)'] - metadata = dict(user='tsirif', datetime=datetime.datetime.utcnow(), user_args=user_args) - algorithms = {'random': {'seed': None}} - config = dict(name='experiment_test', metadata=metadata, version=1, algorithms=algorithms) - backward.populate_priors(config['metadata']) - - get_storage().create_experiment(config) - parent_id = config.pop('_id') - - looser = Experiment('experiment_test', version=1) - - # Simulate exp2 winning the race condition - config2 = copy.deepcopy(config) - config2['version'] = 2 - config2['metadata']['user_args'].append("--y~+normal(0,1)") - backward.populate_priors(config2['metadata']) - config2['refers'] = dict(parent_id=parent_id, root_id=parent_id, adapters=[]) - get_storage().create_experiment(config2) - - # Now exp3 losses the race condition - config3 = copy.deepcopy(config) - config3['metadata']['user_args'].pop() - config3['metadata']['user_args'].append("--z~+normal(0,1)") - backward.populate_priors(config3['metadata']) - config3['version'] = 1 - - with pytest.raises(ValueError) as exc_info: - looser.configure(config3) - assert 'Configuration is different and generates a branching' in str(exc_info.value) - - def test_race_condition_wout_version(self): - """Test that an experiment loosing the race condition during version increment raises - RaceCondition if version number was not specified. - """ - user_args = ['--x~normal(0,1)'] - metadata = dict(user='tsirif', datetime=datetime.datetime.utcnow(), user_args=user_args) - algorithms = {'random': {'seed': None}} - config = dict(name='experiment_test', metadata=metadata, version=1, algorithms=algorithms) - backward.populate_priors(config['metadata']) - - get_storage().create_experiment(config) - parent_id = config.pop('_id') - - looser = Experiment('experiment_test', version=1) - - # Simulate exp2 winning the race condition - config2 = copy.deepcopy(config) - config2['version'] = 2 - config2['metadata']['user_args'].append("--y~+normal(0,1)") - backward.populate_priors(config2['metadata']) - config2['refers'] = dict(parent_id=parent_id, root_id=parent_id, adapters=[]) - get_storage().create_experiment(config2) - - # Now exp3 losses the race condition - config3 = copy.deepcopy(config) - config3['metadata']['user_args'].pop() - config3['metadata']['user_args'].append("--z~+normal(0,1)") - backward.populate_priors(config3['metadata']) - config3.pop('version') - - with pytest.raises(RaceCondition) as exc_info: - looser.configure(config3) - assert 'There was likely a race condition' in str(exc_info.value) +@pytest.fixture() +def algorithm(space): + """Build a dumb algo object""" + return PrimaryAlgo(space, 'dumbalgo') class TestReserveTrial(object): @@ -719,484 +178,408 @@ class TestReserveTrial(object): @pytest.mark.usefixtures("create_db_instance") def test_reserve_none(self): """Find nothing, return None.""" - exp = Experiment('supernaekei') - trial = exp.reserve_trial() - assert trial is None + with OrionState(experiments=[], trials=[]): + exp = Experiment('supernaekei') + trial = exp.reserve_trial() + assert trial is None - def test_reserve_success(self, exp_config_file, random_dt): + def test_reserve_success(self, random_dt): """Successfully find new trials in db and reserve the first one""" - with OrionState(from_yaml=exp_config_file) as cfg: - exp = cfg.get_experiment('supernaedo2-dendi') + storage_config = {'type': 'legacy', 'database': {'type': 'EphemeralDB'}} + with OrionState(trials=generate_trials(['new', 'reserved']), + storage=storage_config) as cfg: + exp = Experiment('supernaekei') + exp._id = cfg.trials[0]['experiment'] + trial = exp.reserve_trial() + # Trials are sorted according to hash and 'new' gets position second cfg.trials[1]['status'] = 'reserved' cfg.trials[1]['start_time'] = random_dt cfg.trials[1]['heartbeat'] = random_dt assert trial.to_dict() == cfg.trials[1] - def test_reserve_when_exhausted(self, exp_config, hacked_exp): + def test_reserve_when_exhausted(self): """Return None once all the trials have been allocated""" - for _ in range(10): - trial = hacked_exp.reserve_trial() - - assert trial is None - - def test_fix_lost_trials(self, hacked_exp, random_dt): + stati = ['new', 'reserved', 'interrupted', 'completed', 'broken'] + with OrionState(trials=generate_trials(stati)) as cfg: + exp = Experiment('supernaekei') + exp._id = cfg.trials[0]['experiment'] + assert exp.reserve_trial() is not None + assert exp.reserve_trial() is not None + assert exp.reserve_trial() is None + + def test_fix_lost_trials(self): """Test that a running trial with an old heartbeat is set to interrupted.""" - exp_query = {'experiment': hacked_exp.id} - trial = hacked_exp.fetch_trials(exp_query)[0] - heartbeat = random_dt - datetime.timedelta(seconds=180) - - get_storage().set_trial_status(trial, status='reserved', heartbeat=heartbeat) - - def fetch_trials(status='reserved'): - trials = hacked_exp.fetch_trials_by_status(status) - return list(filter(lambda new_trial: new_trial.id in [trial.id], trials)) - - assert len(fetch_trials()) == 1 - - hacked_exp.fix_lost_trials() - - assert len(fetch_trials()) == 0 - - assert len(fetch_trials('interrupted')) == 1 - - def test_fix_only_lost_trials(self, hacked_exp, random_dt): + trial = copy.deepcopy(base_trial) + trial['status'] = 'reserved' + trial['heartbeat'] = datetime.datetime.utcnow() - datetime.timedelta(seconds=360) + with OrionState(trials=[trial]) as cfg: + exp = Experiment('supernaekei') + exp._id = cfg.trials[0]['experiment'] + + assert len(exp.fetch_trials_by_status('reserved')) == 1 + exp.fix_lost_trials() + assert len(exp.fetch_trials_by_status('reserved')) == 0 + + def test_fix_only_lost_trials(self): """Test that an old trial is set to interrupted but not a recent one.""" - exp_query = {'experiment': hacked_exp.id} - trials = hacked_exp.fetch_trials(exp_query) - lost = trials[0] - not_lost = trials[1] + lost_trial, running_trial = generate_trials(['reserved'] * 2) + lost_trial['heartbeat'] = datetime.datetime.utcnow() - datetime.timedelta(seconds=360) + running_trial['heartbeat'] = datetime.datetime.utcnow() - heartbeat = random_dt - datetime.timedelta(seconds=180) + with OrionState(trials=[lost_trial, running_trial]) as cfg: + exp = Experiment('supernaekei') + exp._id = cfg.trials[0]['experiment'] - get_storage().set_trial_status(lost, status='reserved', heartbeat=heartbeat) - get_storage().set_trial_status(not_lost, status='reserved', heartbeat=random_dt) + assert len(exp.fetch_trials_by_status('reserved')) == 2 - def fetch_trials(): - trials = hacked_exp.fetch_trials_by_status('reserved') - return list(filter(lambda trial: trial.id in [lost.id, not_lost.id], trials)) + exp.fix_lost_trials() - assert len(fetch_trials()) == 2 + reserved_trials = exp.fetch_trials_by_status('reserved') + assert len(reserved_trials) == 1 + assert reserved_trials[0].to_dict()['params'] == running_trial['params'] - hacked_exp.fix_lost_trials() + failedover_trials = exp.fetch_trials_by_status('interrupted') + assert len(failedover_trials) == 1 + assert failedover_trials[0].to_dict()['params'] == lost_trial['params'] - assert len(fetch_trials()) == 1 + def test_fix_lost_trials_race_condition(self, monkeypatch, caplog): + """Test that a lost trial fixed by a concurrent process does not cause error.""" + trial = copy.deepcopy(base_trial) + trial['status'] = 'interrupted' + trial['heartbeat'] = datetime.datetime.utcnow() - datetime.timedelta(seconds=360) + with OrionState(trials=[trial]) as cfg: + exp = Experiment('supernaekei') + exp._id = cfg.trials[0]['experiment'] - exp_query['status'] = 'interrupted' + assert len(exp.fetch_trials_by_status('interrupted')) == 1 - assert len(fetch_trials()) == 1 + assert len(exp._storage.fetch_lost_trials(exp)) == 0 - def test_fix_lost_trials_race_condition(self, hacked_exp, random_dt, monkeypatch): - """Test that a lost trial fixed by a concurrent process does not cause error.""" - exp_query = {'experiment': hacked_exp.id} - trial = hacked_exp.fetch_trials(exp_query)[0] - heartbeat = random_dt - datetime.timedelta(seconds=180) + def fetch_lost_trials(self, query): + trial_object = Trial(**trial) + trial_object.status = 'reserved' + return [trial_object] - get_storage().set_trial_status(trial, status='interrupted', heartbeat=heartbeat) + # Force the fetch of a trial marked as reserved (and lost) while actually interrupted + # (as if already failed-over by another process). + with monkeypatch.context() as m: + m.setattr(exp._storage.__class__, 'fetch_lost_trials', fetch_lost_trials) - assert hacked_exp.fetch_trials(exp_query)[0].status == 'interrupted' + assert len(exp._storage.fetch_lost_trials(exp)) == 1 - def fetch_lost_trials(self, query): - trial.status = 'reserved' - return [trial] + with caplog.at_level(logging.DEBUG): + exp.fix_lost_trials() - with monkeypatch.context() as m: - m.setattr(hacked_exp.__class__, 'fetch_trials', fetch_lost_trials) - hacked_exp.fix_lost_trials() + assert caplog.records[-1].levelname == 'DEBUG' + assert caplog.records[-1].msg == 'failed' + assert len(exp.fetch_trials_by_status('interrupted')) == 1 + assert len(exp.fetch_trials_by_status('reserved')) == 0 - def test_fix_lost_trials_configurable_hb(self, hacked_exp, random_dt): + def test_fix_lost_trials_configurable_hb(self): """Test that heartbeat is correctly being configured.""" - exp_query = {'experiment': hacked_exp.id} - trial = hacked_exp.fetch_trials(exp_query)[0] - old_heartbeat_value = orion.core.config.worker.heartbeat - heartbeat = random_dt - datetime.timedelta(seconds=180) + trial = copy.deepcopy(base_trial) + trial['status'] = 'reserved' + trial['heartbeat'] = datetime.datetime.utcnow() - datetime.timedelta(seconds=180) + with OrionState(trials=[trial]) as cfg: + exp = Experiment('supernaekei') + exp._id = cfg.trials[0]['experiment'] - get_storage().set_trial_status(trial, - status='reserved', - heartbeat=heartbeat) + assert len(exp.fetch_trials_by_status('reserved')) == 1 - trials = get_storage().fetch_trial_by_status(hacked_exp, 'reserved') + orion.core.config.worker.heartbeat = 360 - assert trial.id in [t.id for t in trials] + exp.fix_lost_trials() - orion.core.config.worker.heartbeat = 210 - hacked_exp.fix_lost_trials() + assert len(exp.fetch_trials_by_status('reserved')) == 1 - trials = get_storage().fetch_trial_by_status(hacked_exp, 'reserved') + orion.core.config.worker.heartbeat = 180 - assert trial.id in [t.id for t in trials] + exp.fix_lost_trials() - orion.core.config.worker.heartbeat = old_heartbeat_value + assert len(exp.fetch_trials_by_status('reserved')) == 0 -def test_update_completed_trial(hacked_exp, database, random_dt): +def test_update_completed_trial(random_dt): """Successfully push a completed trial into database.""" - trial = hacked_exp.reserve_trial() - - results_file = tempfile.NamedTemporaryFile( - mode='w', prefix='results_', suffix='.log', dir='.', delete=True - ) - - # Generate fake result - with open(results_file.name, 'w') as file: - json.dump([{ - 'name': 'loss', - 'type': 'objective', - 'value': 2}], - file + with OrionState(trials=generate_trials(['new'])) as cfg: + exp = Experiment('supernaekei') + exp._id = cfg.trials[0]['experiment'] + + trial = exp.reserve_trial() + + results_file = tempfile.NamedTemporaryFile( + mode='w', prefix='results_', suffix='.log', dir='.', delete=True ) - # -- - hacked_exp.update_completed_trial(trial, results_file=results_file) + # Generate fake result + with open(results_file.name, 'w') as file: + json.dump([{ + 'name': 'loss', + 'type': 'objective', + 'value': 2}], + file + ) + # -- + + exp.update_completed_trial(trial, results_file=results_file) - yo = database.trials.find_one({'_id': trial.id}) + yo = get_storage().fetch_trials(exp)[0].to_dict() - assert len(yo['results']) == len(trial.results) - assert yo['results'][0] == trial.results[0].to_dict() - assert yo['status'] == 'completed' - assert yo['end_time'] == random_dt + assert len(yo['results']) == len(trial.results) + assert yo['results'][0] == trial.results[0].to_dict() + assert yo['status'] == 'completed' + assert yo['end_time'] == random_dt - results_file.close() + results_file.close() @pytest.mark.usefixtures("with_user_tsirif") -def test_register_trials(database, random_dt, hacked_exp): +def test_register_trials(random_dt): """Register a list of newly proposed trials/parameters.""" - hacked_exp._id = 'lalala' # white box hack - trials = [ - Trial(params=[{'name': 'a', 'type': 'integer', 'value': 5}]), - Trial(params=[{'name': 'b', 'type': 'integer', 'value': 6}]), - ] - for trial in trials: - hacked_exp.register_trial(trial) - yo = list(database.trials.find({'experiment': hacked_exp._id})) - assert len(yo) == len(trials) - assert yo[0]['params'] == list(map(lambda x: x.to_dict(), trials[0].params)) - assert yo[1]['params'] == list(map(lambda x: x.to_dict(), trials[1].params)) - assert yo[0]['status'] == 'new' - assert yo[1]['status'] == 'new' - assert yo[0]['submit_time'] == random_dt - assert yo[1]['submit_time'] == random_dt - - -def test_fetch_all_trials(hacked_exp, exp_config, random_dt): + with OrionState(): + exp = Experiment('supernaekei') + exp._id = 0 + + trials = [ + Trial(params=[{'name': 'a', 'type': 'integer', 'value': 5}]), + Trial(params=[{'name': 'b', 'type': 'integer', 'value': 6}]), + ] + for trial in trials: + exp.register_trial(trial) + + yo = list(map(lambda trial: trial.to_dict(), get_storage().fetch_trials(exp))) + assert len(yo) == len(trials) + assert yo[0]['params'] == list(map(lambda x: x.to_dict(), trials[0]._params)) + assert yo[1]['params'] == list(map(lambda x: x.to_dict(), trials[1]._params)) + assert yo[0]['status'] == 'new' + assert yo[1]['status'] == 'new' + assert yo[0]['submit_time'] == random_dt + assert yo[1]['submit_time'] == random_dt + + +def test_fetch_all_trials(): """Fetch a list of all trials""" - query = dict() - trials = hacked_exp.fetch_trials(query) - sorted_exp_config = list( - sorted(exp_config[1][0:7], - key=lambda trial: trial.get('submit_time', datetime.datetime.utcnow()))) + with OrionState(trials=generate_trials(['new', 'reserved', 'completed'])) as cfg: + exp = Experiment('supernaekei') + exp._id = cfg.trials[0]['experiment'] - assert len(trials) == 7 - for i in range(7): - assert trials[i].to_dict() == sorted_exp_config[i] + trials = list(map(lambda trial: trial.to_dict(), exp.fetch_trials({}))) + assert trials == cfg.trials -def test_fetch_non_completed_trials(hacked_exp, exp_config): +def test_fetch_non_completed_trials(): """Fetch a list of the trials that are not completed trials.status in ['new', 'interrupted', 'suspended', 'broken'] """ - # Set two of completed trials to broken and reserved to have all possible status - query = {'status': 'completed', 'experiment': hacked_exp.id} - database = get_db_from_view(hacked_exp) - completed_trials = database.trials.find(query) - exp_config[1][0]['status'] = 'broken' - database.trials.update({'_id': completed_trials[0]['_id']}, {'$set': {'status': 'broken'}}) - exp_config[1][2]['status'] = 'reserved' - database.trials.update({'_id': completed_trials[1]['_id']}, {'$set': {'status': 'reserved'}}) - - # Make sure non completed trials and completed trials are set properly for the unit-test - query = {'status': {'$ne': 'completed'}, 'experiment': hacked_exp.id} - non_completed_trials = list(database.trials.find(query)) - assert len(non_completed_trials) == 6 - # Make sure we have all type of status except completed - assert (set(trial['status'] for trial in non_completed_trials) == - set(['new', 'reserved', 'suspended', 'interrupted', 'broken'])) - - trials = hacked_exp.fetch_noncompleted_trials() - assert len(trials) == 6 - - def find_and_compare(trial_config): - """Find the trial corresponding to given config and compare it""" - trial = [trial for trial in trials if trial.id == trial_config['_id']] - assert len(trial) == 1 - trial = trial[0] - assert trial.to_dict() == trial_config - - find_and_compare(exp_config[1][0]) - find_and_compare(exp_config[1][2]) - find_and_compare(exp_config[1][3]) - find_and_compare(exp_config[1][4]) - find_and_compare(exp_config[1][5]) - find_and_compare(exp_config[1][6]) - - -def test_is_done_property(hacked_exp): - """Check experiment stopping conditions for maximum number of trials completed.""" - assert hacked_exp.is_done is False - hacked_exp.max_trials = 2 - assert hacked_exp.is_done is True - - -def test_is_done_property_with_algo(hacked_exp): - """Check experiment stopping conditions for algo which converged.""" - # Configure experiment to have instantiated algo - hacked_exp.configure(hacked_exp.configuration) - assert hacked_exp.is_done is False - hacked_exp.algorithms.algorithm.done = True - assert hacked_exp.is_done is True - - -def test_broken_property(hacked_exp): - """Check experiment stopping conditions for maximum number of broken.""" - assert not hacked_exp.is_broken - MAX_BROKEN = 3 - orion.core.config.worker.max_broken = MAX_BROKEN - trials = hacked_exp.fetch_trials()[:MAX_BROKEN] + non_completed_stati = ['new', 'interrupted', 'suspended', 'reserved'] + stati = non_completed_stati + ['completed'] + with OrionState(trials=generate_trials(stati)) as cfg: + exp = Experiment('supernaekei') + exp._id = cfg.trials[0]['experiment'] - for trial in trials: - get_storage().set_trial_status(trial, status='broken') + trials = exp.fetch_noncompleted_trials() + assert len(trials) == 4 + assert set(trial.status for trial in trials) == set(non_completed_stati) - assert hacked_exp.is_broken +def test_is_done_property_with_pending(algorithm): + """Check experiment stopping conditions when there is pending trials.""" + completed = ['completed'] * 10 + reserved = ['reserved'] * 5 + with OrionState(trials=generate_trials(completed + reserved)) as cfg: + exp = Experiment('supernaekei') + exp._id = cfg.trials[0]['experiment'] -def test_configurable_broken_property(hacked_exp): - """Check if max_broken changes after configuration.""" - assert not hacked_exp.is_broken - MAX_BROKEN = 3 - orion.core.config.worker.max_broken = MAX_BROKEN - trials = hacked_exp.fetch_trials()[:MAX_BROKEN] + exp.algorithms = algorithm + exp.max_trials = 10 - for trial in trials: - get_storage().set_trial_status(trial, status='broken') + assert exp.is_done - assert hacked_exp.is_broken + exp.max_trials = 15 - orion.core.config.worker.max_broken += 1 + # There is only 10 completed trials + assert not exp.is_done - assert not hacked_exp.is_broken + exp.algorithms.algorithm.done = True + # Algorithm is done but 5 trials are pending + assert not exp.is_done -def test_experiment_stats(hacked_exp, exp_config, random_dt): - """Check that property stats is returning a proper summary of experiment's results.""" - stats = hacked_exp.stats - assert stats['trials_completed'] == 3 - assert stats['best_trials_id'] == exp_config[1][2]['_id'] - assert stats['best_evaluation'] == 2 - assert stats['start_time'] == exp_config[0][4]['metadata']['datetime'] - assert stats['finish_time'] == exp_config[1][1]['end_time'] - assert stats['duration'] == stats['finish_time'] - stats['start_time'] - assert len(stats) == 6 - - -class TestInitExperimentView(object): - """Create new ExperimentView instance.""" - - @pytest.mark.usefixtures("with_user_tsirif") - def test_empty_experiment_view(self): - """Hit user name, but exp_name does not hit the db.""" - with pytest.raises(ValueError) as exc_info: - ExperimentView('supernaekei') - assert ("No experiment with given name 'supernaekei' for user 'tsirif'" - in str(exc_info.value)) - - @pytest.mark.usefixtures("with_user_tsirif") - def test_existing_experiment_view(self, create_db_instance, exp_config): - """Hit exp_name + user's name in the db, fetch most recent entry.""" - exp = ExperimentView('supernaedo2-dendi') - assert exp._experiment._init_done is False - - assert exp._id == exp_config[0][0]['_id'] - assert exp.name == exp_config[0][0]['name'] - assert exp.configuration['refers'] == exp_config[0][0]['refers'] - assert exp.metadata == exp_config[0][0]['metadata'] - assert exp.pool_size == exp_config[0][0]['pool_size'] - assert exp.max_trials == exp_config[0][0]['max_trials'] - assert exp.version == exp_config[0][0]['version'] - assert isinstance(exp.refers['adapter'], BaseAdapter) - # TODO: Views are not fully configured until configuration is refactored - # assert exp.algorithms.configuration == exp_config[0][0]['algorithms'] - with pytest.raises(AttributeError): - exp.this_is_not_in_config = 5 +def test_is_done_property_no_pending(algorithm): + """Check experiment stopping conditions when there is no pending trials.""" + completed = ['completed'] * 10 + broken = ['broken'] * 5 + with OrionState(trials=generate_trials(completed + broken)) as cfg: + exp = Experiment('supernaekei') + exp._id = cfg.trials[0]['experiment'] - # Test that experiment.update_completed_trial indeed exists - exp._experiment.update_completed_trial - with pytest.raises(AttributeError): - exp.update_completed_trial + exp.algorithms = algorithm - with pytest.raises(AttributeError): - exp.register_trial + exp.max_trials = 15 - with pytest.raises(AttributeError): - exp.reserve_trial + # There is only 10 completed trials and algo not done. + assert not exp.is_done + + exp.algorithms.algorithm.done = True + + # Algorithm is done and no pending trials + assert exp.is_done + + +def test_broken_property(): + """Check experiment stopping conditions for maximum number of broken.""" + MAX_BROKEN = 3 + orion.core.config.worker.max_broken = MAX_BROKEN + + stati = (['reserved'] * 10) + (['broken'] * (MAX_BROKEN - 1)) + with OrionState(trials=generate_trials(stati)) as cfg: + exp = Experiment('supernaekei') + exp._id = cfg.trials[0]['experiment'] + + assert not exp.is_broken - @pytest.mark.skip(reason='Views are not fully configured until configuration is refactored') - @pytest.mark.usefixtures("with_user_tsirif", "create_db_instance") - def test_experiment_view_not_modified(self, exp_config, monkeypatch): - """Experiment should not be modified if fetched in another verion of Oríon. + stati = (['reserved'] * 10) + (['broken'] * (MAX_BROKEN)) + with OrionState(trials=generate_trials(stati)) as cfg: + exp = Experiment('supernaekei') + exp._id = cfg.trials[0]['experiment'] + + assert exp.is_broken - When loading a view the original config is used to configure the experiment, but - this process may modify the config if the version of Oríon is different. This should not be - saved in database. - """ - terrible_message = 'oh no, I have been modified!' - original_configuration = ExperimentView('supernaedo2').configuration - def modified_configuration(self): - mocked_config = copy.deepcopy(original_configuration) - mocked_config['metadata']['datetime'] = terrible_message - return mocked_config +def test_configurable_broken_property(): + """Check if max_broken changes after configuration.""" + MAX_BROKEN = 3 + orion.core.config.worker.max_broken = MAX_BROKEN - with monkeypatch.context() as m: - m.setattr(Experiment, 'configuration', property(modified_configuration)) - exp = ExperimentView('supernaedo2') + stati = (['reserved'] * 10) + (['broken'] * (MAX_BROKEN)) + with OrionState(trials=generate_trials(stati)) as cfg: + exp = Experiment('supernaekei') + exp._id = cfg.trials[0]['experiment'] - # The mock is still in place and overwrites the configuration - assert exp.configuration['metadata']['datetime'] == terrible_message + assert exp.is_broken - # The mock is reverted and original config is returned, but modification is still in - # metadata - assert exp.metadata['datetime'] == terrible_message + orion.core.config.worker.max_broken += 1 - # Loading again from DB confirms the DB was not overwritten - reloaded_exp = ExperimentView('supernaedo2') - assert reloaded_exp.configuration['metadata']['datetime'] != terrible_message + assert not exp.is_broken -def test_fetch_completed_trials_from_view(hacked_exp, exp_config, random_dt): +def test_experiment_stats(): + """Check that property stats is returning a proper summary of experiment's results.""" + NUM_COMPLETED = 3 + stati = (['completed'] * NUM_COMPLETED) + (['reserved'] * 2) + with OrionState(trials=generate_trials(stati)) as cfg: + exp = Experiment('supernaekei') + exp._id = cfg.trials[0]['experiment'] + exp.metadata = {'datetime': datetime.datetime.utcnow()} + stats = exp.stats + assert stats['trials_completed'] == NUM_COMPLETED + assert stats['best_trials_id'] == cfg.trials[3]['_id'] + assert stats['best_evaluation'] == 0 + assert stats['start_time'] == exp.metadata['datetime'] + assert stats['finish_time'] == cfg.trials[0]['end_time'] + assert stats['duration'] == stats['finish_time'] - stats['start_time'] + assert len(stats) == 6 + + +def test_fetch_completed_trials_from_view(): """Fetch a list of the unseen yet completed trials.""" - experiment_view = ExperimentView(hacked_exp.name) - experiment_view._experiment = hacked_exp + non_completed_stati = ['new', 'interrupted', 'suspended', 'reserved'] + stati = non_completed_stati + ['completed'] + with OrionState(trials=generate_trials(stati)) as cfg: + exp = Experiment('supernaekei') + exp._id = cfg.trials[0]['experiment'] + exp_view = ExperimentView(exp) + + trials = exp_view.fetch_trials_by_status('completed') + assert len(trials) == 1 + assert trials[0].status == 'completed' + - trials = experiment_view.fetch_trials_by_status('completed') - assert len(trials) == 3 - assert trials[0].to_dict() == exp_config[1][0] - assert trials[1].to_dict() == exp_config[1][2] - assert trials[2].to_dict() == exp_config[1][1] +def test_view_is_done_property_with_pending(algorithm): + """Check experiment stopping conditions from view when there is pending trials.""" + completed = ['completed'] * 10 + reserved = ['reserved'] * 5 + with OrionState(trials=generate_trials(completed + reserved)) as cfg: + exp = Experiment('supernaekei') + exp._id = cfg.trials[0]['experiment'] + exp.algorithms = algorithm + exp.max_trials = 10 + + exp_view = ExperimentView(exp) + assert exp_view.is_done -def test_view_is_done_property(hacked_exp): - """Check experiment stopping conditions accessed from view.""" - experiment_view = ExperimentView(hacked_exp.name) - experiment_view._experiment = hacked_exp + exp.max_trials = 15 - # Fully configure wrapper experiment (should normally occur inside ExperimentView.__init__ - # but hacked_exp has been _hacked_ inside afterwards. - hacked_exp.configure(hacked_exp.configuration) + # There is only 10 completed trials + assert not exp_view.is_done - assert experiment_view.is_done is False + exp.algorithms.algorithm.done = True - with pytest.raises(AttributeError): - experiment_view.max_trials = 2 + # Algorithm is done but 5 trials are pending + assert not exp_view.is_done - hacked_exp.max_trials = 2 - assert experiment_view.is_done is True +def test_view_is_done_property_no_pending(algorithm): + """Check experiment stopping conditions from view when there is no pending trials.""" + completed = ['completed'] * 10 + broken = ['broken'] * 5 + with OrionState(trials=generate_trials(completed + broken)) as cfg: + exp = Experiment('supernaekei') + exp._id = cfg.trials[0]['experiment'] + exp.algorithms = algorithm + exp.max_trials = 100 + exp_view = ExperimentView(exp) -def test_view_algo_is_done_property(hacked_exp): - """Check experiment's algo stopping conditions accessed from view.""" - experiment_view = ExperimentView(hacked_exp.name) - experiment_view._experiment = hacked_exp + exp.algorithms = algorithm - # Fully configure wrapper experiment (should normally occur inside ExperimentView.__init__ - # but hacked_exp has been _hacked_ inside afterwards. - hacked_exp.configure(hacked_exp.configuration) + exp.max_trials = 15 - assert experiment_view.is_done is False + # There is only 10 completed trials and algo not done. + assert not exp_view.is_done - hacked_exp.algorithms.algorithm.done = True + exp.algorithms.algorithm.done = True - assert experiment_view.is_done is True + # Algorithm is done and no pending trials + assert exp_view.is_done -def test_experiment_view_stats(hacked_exp, exp_config, random_dt): +def test_experiment_view_stats(): """Check that property stats from view is consistent.""" - experiment_view = ExperimentView(hacked_exp.name) - experiment_view._experiment = hacked_exp + NUM_COMPLETED = 3 + stati = (['completed'] * NUM_COMPLETED) + (['reserved'] * 2) + with OrionState(trials=generate_trials(stati)) as cfg: + exp = Experiment('supernaekei') + exp._id = cfg.trials[0]['experiment'] + exp.metadata = {'datetime': datetime.datetime.utcnow()} - stats = experiment_view.stats - assert stats['trials_completed'] == 3 - assert stats['best_trials_id'] == exp_config[1][2]['_id'] - assert stats['best_evaluation'] == 2 - assert stats['start_time'] == exp_config[0][4]['metadata']['datetime'] - assert stats['finish_time'] == exp_config[1][1]['end_time'] - assert stats['duration'] == stats['finish_time'] - stats['start_time'] - assert len(stats) == 6 + exp_view = ExperimentView(exp) + + stats = exp_view.stats + assert stats['trials_completed'] == NUM_COMPLETED + assert stats['best_trials_id'] == cfg.trials[3]['_id'] + assert stats['best_evaluation'] == 0 + assert stats['start_time'] == exp_view.metadata['datetime'] + assert stats['finish_time'] == cfg.trials[0]['end_time'] + assert stats['duration'] == stats['finish_time'] - stats['start_time'] + assert len(stats) == 6 -@pytest.mark.usefixtures("with_user_tsirif") def test_experiment_view_protocol_read_only(): """Verify that wrapper experiments' protocol is read-only""" - exp = ExperimentView('supernaedo2') - - # Test that _protocol.set_trial_status indeed exists - exp._experiment._storage._storage.set_trial_status - - with pytest.raises(AttributeError): - exp._experiment._storage.set_trial_status - - -class TestInitExperimentWithEVC(object): - """Create new Experiment instance with EVC.""" - - @pytest.mark.usefixtures("with_user_tsirif") - def test_new_experiment_with_parent(self, create_db_instance, random_dt, exp_config): - """Configure a branch experiment.""" - exp = Experiment('supernaedo2.6') - exp.metadata = exp_config[0][4]['metadata'] - exp.refers = exp_config[0][4]['refers'] - exp.algorithms = exp_config[0][4]['algorithms'] - exp.configure(exp.configuration) - assert exp._init_done is True - assert_protocol(exp, create_db_instance) - assert exp._id is not None - assert exp.name == 'supernaedo2.6' - assert exp.configuration['refers'] == exp_config[0][4]['refers'] - exp_config[0][4]['metadata']['datetime'] = random_dt - assert exp.metadata == exp_config[0][4]['metadata'] - assert exp.pool_size is None - assert exp.max_trials is None - assert exp.version == 1 - assert exp.configuration['algorithms'] == {'random': {'seed': None}} - - @pytest.mark.usefixtures("with_user_tsirif") - def test_experiment_with_parent(self, create_db_instance, random_dt, exp_config): - """Configure an existing experiment with parent.""" - exp = Experiment('supernaedo2.1') - exp.algorithms = {'random': {'seed': None}} - exp.configure(exp.configuration) - assert exp._init_done is True - assert_protocol(exp, create_db_instance) - assert exp._id is not None - assert exp.name == 'supernaedo2.1' - assert exp.configuration['refers'] == exp_config[0][4]['refers'] - assert exp.metadata == exp_config[0][4]['metadata'] - assert exp.pool_size == 2 - assert exp.max_trials == 1000 - assert exp.version == 1 - assert exp.configuration['algorithms'] == {'random': {'seed': None}} - - @pytest.mark.usefixtures("with_user_tsirif") - def test_experiment_non_interactive_branching(self, create_db_instance, random_dt, exp_config, - monkeypatch): - """Configure an existing experiment with parent.""" - def _patch_fetch(config): - return {'manual_resolution': True} - - monkeypatch.setattr(orion.core.worker.experiment, - "fetch_branching_configuration", _patch_fetch) - with monkeypatch.context() as ctx: - ctx.setattr('sys.__stdin__.isatty', lambda: True) - exp = Experiment('supernaedo2.1') - exp.algorithms = {'dumbalgo': {}} - with pytest.raises(OSError): - exp.configure(exp.configuration) - - with pytest.raises(ValueError) as exc_info: - exp.configure(exp.configuration) - assert "Configuration is different and generates a branching" in str(exc_info.value) + with OrionState(): + exp = Experiment('supernaekei') + + exp_view = ExperimentView(exp) + + # Test that _protocol.set_trial_status indeed exists + exp_view._experiment._storage._storage.set_trial_status + + with pytest.raises(AttributeError): + exp_view._experiment._storage.set_trial_status diff --git a/tests/unittests/core/test_producer.py b/tests/unittests/core/worker/test_producer.py similarity index 86% rename from tests/unittests/core/test_producer.py rename to tests/unittests/core/worker/test_producer.py index 743a12646..557faa81a 100644 --- a/tests/unittests/core/test_producer.py +++ b/tests/unittests/core/worker/test_producer.py @@ -7,6 +7,9 @@ import pytest +from orion.core.io.experiment_builder import build +from orion.core.utils.exceptions import SampleTimeout, WaitingForTrials +from orion.core.utils.format_trials import trial_to_tuple from orion.core.worker.producer import Producer from orion.core.worker.trial import Trial @@ -47,25 +50,26 @@ def update_naive_algorithm(producer): @pytest.fixture() -def producer(hacked_exp, random_dt, exp_config, categorical_values): +def producer(monkeypatch, hacked_exp, random_dt, exp_config, categorical_values): """Return a setup `Producer`.""" # make init done - # TODO: Remove this commented out if test pass - # hacked_exp.configure(exp_config[0][3]) - # # insert fake point - # fake_point = ('gru', 'rnn') - # assert fake_point in hacked_exp.space - # hacked_exp.algorithms.algorithm.value = fake_point - - hacked_exp.configure(exp_config[0][0]) hacked_exp.pool_size = 1 hacked_exp.algorithms.algorithm.possible_values = categorical_values hacked_exp.algorithms.seed_rng(0) hacked_exp.producer['strategy'] = DumbParallelStrategy() - return Producer(hacked_exp) + producer = Producer(hacked_exp) + + def backoff(self): + """Dont wait, just update.""" + self.update() + self.failure_count += 1 + + monkeypatch.setattr(Producer, 'backoff', backoff) + + return producer def test_algo_observe_completed(producer): @@ -546,11 +550,11 @@ def test_exceed_max_idle_time_because_of_duplicates(producer, database, random_d producer.update() start = time.time() - with pytest.raises(RuntimeError) as exc_info: + + with pytest.raises(SampleTimeout): producer.produce() - assert timeout <= time.time() - start < timeout + 1 - assert "Algorithm could not sample new points" in str(exc_info.value) + assert timeout <= time.time() - start < timeout + 1 def test_exceed_max_idle_time_because_of_optout(producer, database, random_dt, monkeypatch): @@ -569,12 +573,8 @@ def opt_out(self, num=1): producer.update() - start = time.time() - with pytest.raises(RuntimeError) as exc_info: + with pytest.raises(WaitingForTrials): producer.produce() - assert timeout <= time.time() - start < timeout + 1 - - assert "Algorithm could not sample new points" in str(exc_info.value) def test_stops_if_algo_done(producer, database, random_dt, monkeypatch): @@ -627,3 +627,82 @@ def test_original_seeding(producer, database): assert prev_suggested != producer.algorithm.algorithm._suggested assert prev_index < producer.algorithm.algorithm._index + + +def test_evc(monkeypatch, producer): + """Verify that producer is using available trials from EVC""" + experiment = producer.experiment + new_experiment = build(experiment.name, algorithms='random') + + # Replace parent with hacked exp, otherwise parent ID does not match trials in DB + # and fetch_trials() won't return anything. + new_experiment._node.parent._item = experiment + + assert len(new_experiment.fetch_trials(with_evc_tree=True)) == len(experiment.fetch_trials()) + + producer.experiment = new_experiment + + def update_algo(trials): + assert len(trials) == 3 + + def update_naive_algo(trials): + assert len(trials) == 4 + + monkeypatch.setattr(producer, '_update_algorithm', update_algo) + monkeypatch.setattr(producer, '_update_naive_algorithm', update_naive_algo) + + producer.update() + + +def test_evc_duplicates(monkeypatch, producer): + """Verify that producer wont register samples that are available in parent experiment""" + experiment = producer.experiment + new_experiment = build(experiment.name, algorithms='random') + + # Replace parent with hacked exp, otherwise parent ID does not match trials in DB + # and fetch_trials() won't return anything. + new_experiment._node.parent._item = experiment + + assert len(new_experiment.fetch_trials(with_evc_tree=True)) == len(experiment.fetch_trials()) + + def suggest(pool_size): + return [trial_to_tuple(experiment.fetch_trials()[-1], experiment.space)] + + producer.experiment = new_experiment + producer.algorithm = new_experiment.algorithms + producer.max_idle_time = 1 + + monkeypatch.setattr(new_experiment.algorithms, 'suggest', suggest) + + producer.update() + with pytest.raises(SampleTimeout): + producer.produce() + + assert len(new_experiment.fetch_trials(with_evc_tree=False)) == 0 + + +def test_algorithm_is_done(monkeypatch, producer): + """Verify that producer won't register new samples if algorithm is done meanwhile.""" + producer.experiment.max_trials = 8 + producer.experiment.pool_size = 10 + # Reset Producer to test that max_trial is set properly during init. + producer = Producer(producer.experiment) + + def suggest_one_only(self, num=1): + """Return only one point, whatever `num` is""" + return [('gru', 'rnn')] + + monkeypatch.delattr(producer.experiment.algorithms.algorithm.__class__, 'is_done') + monkeypatch.setattr(producer.experiment.algorithms.algorithm.__class__, 'suggest', + suggest_one_only) + + assert producer.experiment.pool_size == 10 + trials_in_exp_before = len(producer.experiment.fetch_trials()) + assert trials_in_exp_before == producer.experiment.max_trials - 1 + + producer.update() + producer.produce() + + assert len(producer.experiment.fetch_trials()) == producer.experiment.max_trials + assert producer.naive_algorithm.is_done + assert not producer.experiment.is_done diff --git a/tests/unittests/core/worker/test_trial_pacemaker.py b/tests/unittests/core/worker/test_trial_pacemaker.py index 2a9819572..9f07bbb08 100644 --- a/tests/unittests/core/worker/test_trial_pacemaker.py +++ b/tests/unittests/core/worker/test_trial_pacemaker.py @@ -6,7 +6,7 @@ import pytest -from orion.core.io.experiment_builder import ExperimentBuilder +import orion.core.io.experiment_builder as experiment_builder from orion.core.utils.format_trials import tuple_to_trial from orion.core.worker.trial_pacemaker import TrialPacemaker from orion.storage.base import get_storage @@ -16,7 +16,7 @@ def config(exp_config): """Return a configuration.""" config = exp_config[0][0] - config['metadata']['user_args'] = ['--x~uniform(-50, 50)'] + config['space'] = {'x': 'uniform(-50, 50)'} config['name'] = 'exp' return config @@ -24,7 +24,7 @@ def config(exp_config): @pytest.fixture def exp(config): """Return an Experiment.""" - return ExperimentBuilder().build_from(config) + return experiment_builder.build(**config) @pytest.fixture diff --git a/tests/unittests/storage/test_legacy.py b/tests/unittests/storage/test_legacy.py index 218c3f914..b6e5d9678 100644 --- a/tests/unittests/storage/test_legacy.py +++ b/tests/unittests/storage/test_legacy.py @@ -1,6 +1,25 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- """Collection of tests for :mod:`orion.storage`.""" +import copy +import json +import logging +import tempfile + +import pytest + +from orion.core.io.database import Database +from orion.core.io.database.pickleddb import PickledDB +from orion.core.utils import SingletonAlreadyInstantiatedError, SingletonNotInstantiatedError +from orion.core.utils.exceptions import MissingResultFile +from orion.core.utils.tests import OrionState, update_singletons +from orion.core.worker.trial import Trial +from orion.storage.base import FailedUpdate +from orion.storage.legacy import get_database, setup_database + + +log = logging.getLogger(__name__) +log.setLevel(logging.WARNING) base_experiment = { @@ -13,19 +32,196 @@ } } -db_backends = [ - # { - # 'type': 'PickledDB', - # 'name': 'orion_test' - # }, - # { - # 'type': 'EphemeralDB', - # 'name': 'orion_test' - # }, - { +base_trial = { + 'experiment': 'default_name', + 'status': 'new', # new, reserved, suspended, completed, broken + 'worker': None, + 'submit_time': '2017-11-23T02:00:00', + 'start_time': None, + 'end_time': None, + 'heartbeat': None, + 'results': [ + {'name': 'loss', + 'type': 'objective', # objective, constraint + 'value': 2} + ], + 'params': [ + {'name': '/encoding_layer', + 'type': 'categorical', + 'value': 'rnn'}, + {'name': '/decoding_layer', + 'type': 'categorical', + 'value': 'lstm_with_attention'} + ] +} + + +mongodb_config = { + 'database': { 'type': 'MongoDB', 'name': 'orion_test', 'username': 'user', 'password': 'pass' } +} + +db_backends = [ + { + 'type': 'legacy', + 'database': mongodb_config + } ] + + +@pytest.mark.usefixtures("setup_pickleddb_database") +def test_setup_database_default(monkeypatch): + """Test that database is setup using default config""" + update_singletons() + setup_database() + database = Database() + assert isinstance(database, PickledDB) + + +def test_setup_database_bad(): + """Test how setup fails when configuring with non-existant backends""" + update_singletons() + with pytest.raises(NotImplementedError) as exc: + setup_database({'type': 'idontexist'}) + + assert exc.match('idontexist') + + +def test_setup_database_custom(): + """Test setup with local configuration""" + update_singletons() + setup_database({'type': 'pickleddb', 'host': 'test.pkl'}) + database = Database() + assert isinstance(database, PickledDB) + assert database.host == 'test.pkl' + + +def test_setup_database_bad_override(): + """Test setup with different type than existing singleton""" + update_singletons() + setup_database({'type': 'pickleddb', 'host': 'test.pkl'}) + database = Database() + assert isinstance(database, PickledDB) + with pytest.raises(SingletonAlreadyInstantiatedError) as exc: + setup_database({'type': 'mongodb'}) + + assert exc.match('A singleton instance of \(type: Database\)') + + +@pytest.mark.xfail(reason='Fix this when introducing #135 in v0.2.0') +def test_setup_database_bad_config_override(): + """Test setup with different config than existing singleton""" + update_singletons() + setup_database({'type': 'pickleddb', 'host': 'test.pkl'}) + database = Database() + assert isinstance(database, PickledDB) + with pytest.raises(SingletonAlreadyInstantiatedError): + setup_database({'type': 'pickleddb', 'host': 'other.pkl'}) + + +def test_get_database_uninitiated(): + """Test that get database fails if no database singleton exist""" + update_singletons() + with pytest.raises(SingletonNotInstantiatedError) as exc: + get_database() + + assert exc.match('No singleton instance of \(type: Database\) was created') + + +def test_get_database(): + """Test that get database gets the singleton""" + update_singletons() + setup_database({'type': 'pickleddb', 'host': 'test.pkl'}) + database = get_database() + assert isinstance(database, PickledDB) + assert get_database() == database + + +class TestLegacyStorage: + """Test Legacy Storage retrieve result mechanic separately""" + + def test_push_trial_results(self, storage=None): + """Successfully push a completed trial into database.""" + reserved_trial = copy.deepcopy(base_trial) + reserved_trial['status'] = 'reserved' + with OrionState(experiments=[], trials=[reserved_trial], storage=storage) as cfg: + storage = cfg.storage() + trial = storage.get_trial(Trial(**reserved_trial)) + results = [ + Trial.Result(name='loss', type='objective', value=2) + ] + trial.results = results + assert storage.push_trial_results(trial), 'should update successfully' + + trial2 = storage.get_trial(trial) + assert trial2.results == results + + def test_push_trial_results_unreserved(self, storage=None): + """Successfully push a completed trial into database.""" + with OrionState(experiments=[], trials=[base_trial], storage=storage) as cfg: + storage = cfg.storage() + trial = storage.get_trial(Trial(**base_trial)) + results = [ + Trial.Result(name='loss', type='objective', value=2) + ] + trial.results = results + with pytest.raises(FailedUpdate): + storage.push_trial_results(trial) + + def retrieve_result(self, storage, generated_result): + """Test retrieve result""" + results_file = tempfile.NamedTemporaryFile( + mode='w', prefix='results_', suffix='.log', dir='.', delete=True + ) + + # Generate fake result + with open(results_file.name, 'w') as file: + json.dump([generated_result], file) + # -- + with OrionState(experiments=[], trials=[], storage=storage) as cfg: + storage = cfg.storage() + + trial = Trial(**base_trial) + trial = storage.retrieve_result(trial, results_file) + + results = trial.results + + assert len(results) == 1 + assert results[0].to_dict() == generated_result + + def test_retrieve_result(self, storage=None): + """Test retrieve result""" + self.retrieve_result(storage, generated_result={ + 'name': 'loss', + 'type': 'objective', + 'value': 2}) + + def test_retrieve_result_incorrect_value(self, storage=None): + """Test retrieve result""" + with pytest.raises(ValueError) as exec: + self.retrieve_result(storage, generated_result={ + 'name': 'loss', + 'type': 'objective_unsupported_type', + 'value': 2}) + + assert exec.match(r'Given type, objective_unsupported_type') + + def test_retrieve_result_nofile(self, storage=None): + """Test retrieve result""" + results_file = tempfile.NamedTemporaryFile( + mode='w', prefix='results_', suffix='.log', dir='.', delete=True + ) + + with OrionState(experiments=[], trials=[], storage=storage) as cfg: + storage = cfg.storage() + + trial = Trial(**base_trial) + + with pytest.raises(MissingResultFile) as exec: + storage.retrieve_result(trial, results_file) + + assert exec.match(r'Cannot parse result file') diff --git a/tests/unittests/storage/test_storage.py b/tests/unittests/storage/test_storage.py index 1c3141902..e4d893069 100644 --- a/tests/unittests/storage/test_storage.py +++ b/tests/unittests/storage/test_storage.py @@ -4,26 +4,43 @@ import copy import datetime -import json -import tempfile +import logging +import time import pytest +import orion.core from orion.core.io.database import DuplicateKeyError -from orion.core.utils.tests import OrionState +from orion.core.io.database.pickleddb import PickledDB +from orion.core.utils import SingletonAlreadyInstantiatedError, SingletonNotInstantiatedError +from orion.core.utils.tests import OrionState, update_singletons from orion.core.worker.trial import Trial -from orion.storage.base import FailedUpdate, get_storage, MissingArguments +from orion.storage.base import FailedUpdate, get_storage, MissingArguments, setup_storage, Storage +from orion.storage.legacy import Legacy +from orion.storage.track import HAS_TRACK, REASON + +log = logging.getLogger(__name__) +log.setLevel(logging.WARNING) storage_backends = [ - None, # defaults to legacy with PickleDB + None # defaults to legacy with PickleDB ] +if not HAS_TRACK: + log.warning('Track is not tested because: %s!', REASON) +else: + storage_backends.append({ + 'type': 'track', + 'uri': 'file://${file}?objective=loss' + }) + base_experiment = { 'name': 'default_name', 'version': 0, 'metadata': { 'user': 'default_user', 'user_script': 'abc', + 'priors': {'x': 'uniform(0, 10)'}, 'datetime': '2017-11-23T02:00:00' } } @@ -71,7 +88,8 @@ def make_lost_trial(): """Make a lost trial""" obj = copy.deepcopy(base_trial) obj['status'] = 'reserved' - obj['heartbeat'] = datetime.datetime.utcnow() - datetime.timedelta(seconds=61 * 2) + obj['heartbeat'] = (datetime.datetime.utcnow() - + datetime.timedelta(seconds=orion.core.config.worker.heartbeat * 2)) obj['params'].append({ 'name': '/index', 'type': 'categorical', @@ -108,13 +126,107 @@ def generate_experiments(): return [_generate(exp, 'name', value=str(i)) for i, exp in enumerate(exps)] +@pytest.mark.usefixtures("setup_pickleddb_database") +def test_setup_storage_default(): + """Test that storage is setup using default config""" + update_singletons() + setup_storage() + storage = Storage() + assert isinstance(storage, Legacy) + assert isinstance(storage._db, PickledDB) + + +def test_setup_storage_bad(): + """Test how setup fails when configuring with non-existant backends""" + update_singletons() + with pytest.raises(NotImplementedError) as exc: + setup_storage({'type': 'idontexist'}) + + assert exc.match('idontexist') + + +def test_setup_storage_custom(): + """Test setup with local configuration""" + update_singletons() + setup_storage({'type': 'legacy', 'database': {'type': 'pickleddb', 'host': 'test.pkl'}}) + storage = Storage() + assert isinstance(storage, Legacy) + assert isinstance(storage._db, PickledDB) + assert storage._db.host == 'test.pkl' + + +def test_setup_storage_custom_type_missing(): + """Test setup with local configuration with type missing""" + update_singletons() + setup_storage({'database': {'type': 'pickleddb', 'host': 'test.pkl'}}) + storage = Storage() + assert isinstance(storage, Legacy) + assert isinstance(storage._db, PickledDB) + assert storage._db.host == 'test.pkl' + + +@pytest.mark.usefixtures("setup_pickleddb_database") +def test_setup_storage_custom_legacy_emtpy(): + """Test setup with local configuration with legacy but no config""" + update_singletons() + setup_storage({'type': 'legacy'}) + storage = Storage() + assert isinstance(storage, Legacy) + assert isinstance(storage._db, PickledDB) + assert storage._db.host == orion.core.config.storage.database.host + + +def test_setup_storage_bad_override(): + """Test setup with different type than existing singleton""" + update_singletons() + setup_storage({'type': 'legacy', 'database': {'type': 'pickleddb', 'host': 'test.pkl'}}) + storage = Storage() + assert isinstance(storage, Legacy) + assert isinstance(storage._db, PickledDB) + with pytest.raises(SingletonAlreadyInstantiatedError) as exc: + setup_storage({'type': 'track'}) + + assert exc.match('A singleton instance of \(type: Storage\)') + + +@pytest.mark.xfail(reason='Fix this when introducing #135 in v0.2.0') +def test_setup_storage_bad_config_override(): + """Test setup with different config than existing singleton""" + update_singletons() + setup_storage({'database': {'type': 'pickleddb', 'host': 'test.pkl'}}) + storage = Storage() + assert isinstance(storage, Legacy) + assert isinstance(storage._db, PickledDB) + with pytest.raises(SingletonAlreadyInstantiatedError): + setup_storage({'database': {'type': 'mongodb'}}) + + +def test_get_storage_uninitiated(): + """Test that get storage fails if no storage singleton exist""" + update_singletons() + with pytest.raises(SingletonNotInstantiatedError) as exc: + get_storage() + + assert exc.match('No singleton instance of \(type: Storage\) was created') + + +def test_get_storage(): + """Test that get storage gets the singleton""" + update_singletons() + setup_storage({'database': {'type': 'pickleddb', 'host': 'test.pkl'}}) + storage = get_storage() + assert isinstance(storage, Legacy) + assert isinstance(storage._db, PickledDB) + assert get_storage() == storage + + @pytest.mark.parametrize('storage', storage_backends) class TestStorage: """Test all storage backend""" def test_create_experiment(self, storage): """Test create experiment""" - with OrionState(experiments=[], database=storage) as cfg: + with OrionState(experiments=[], storage=storage) as cfg: storage = cfg.storage() storage.create_experiment(base_experiment) @@ -125,17 +237,13 @@ def test_create_experiment(self, storage): experiment = experiments[0] assert base_experiment == experiment, 'Local experiment and DB should match' - def test_create_experiment_fail(self, storage): - """Test create experiment""" - with OrionState(experiments=[base_experiment], database=storage) as cfg: - storage = cfg.storage() - + # Insert it again with pytest.raises(DuplicateKeyError): storage.create_experiment(base_experiment) def test_fetch_experiments(self, storage, name='0', user='a'): """Test fetch experiments""" - with OrionState(experiments=generate_experiments(), database=storage) as cfg: + with OrionState(experiments=generate_experiments(), storage=storage) as cfg: storage = cfg.storage() experiments = storage.fetch_experiments({}) @@ -153,10 +261,10 @@ def test_fetch_experiments(self, storage, name='0', user='a'): def test_update_experiment(self, monkeypatch, storage, name='0', user='a'): """Test fetch experiments""" - with OrionState(experiments=generate_experiments(), database=storage) as cfg: + with OrionState(experiments=generate_experiments(), storage=storage) as cfg: storage = cfg.storage() - class _Dummy(): + class _Dummy: pass experiment = cfg.experiments[0] @@ -164,7 +272,12 @@ class _Dummy(): mocked_experiment._id = experiment['_id'] storage.update_experiment(mocked_experiment, test=True) - assert storage.fetch_experiments({'_id': experiment['_id']})[0]['test'] + experiments = storage.fetch_experiments({'_id': experiment['_id']}) + assert len(experiments) == 1 + + fetched_experiment = experiments[0] + assert fetched_experiment['test'] + assert 'test' not in storage.fetch_experiments({'_id': cfg.experiments[1]['_id']})[0] storage.update_experiment(uid=experiment['_id'], test2=True) @@ -179,7 +292,7 @@ class _Dummy(): def test_register_trial(self, storage): """Test register trial""" - with OrionState(experiments=[base_experiment], database=storage) as cfg: + with OrionState(experiments=[base_experiment], storage=storage) as cfg: storage = cfg.storage() trial1 = storage.register_trial(Trial(**base_trial)) trial2 = storage.get_trial(trial1) @@ -189,7 +302,7 @@ def test_register_trial(self, storage): def test_register_duplicate_trial(self, storage): """Test register trial""" with OrionState( - experiments=[base_experiment], trials=[base_trial], database=storage) as cfg: + experiments=[base_experiment], trials=[base_trial], storage=storage) as cfg: storage = cfg.storage() with pytest.raises(DuplicateKeyError): @@ -197,13 +310,13 @@ def test_register_duplicate_trial(self, storage): def test_register_lie(self, storage): """Test register lie""" - with OrionState(experiments=[base_experiment], database=storage) as cfg: + with OrionState(experiments=[base_experiment], storage=storage) as cfg: storage = cfg.storage() storage.register_lie(Trial(**base_trial)) def test_register_lie_fail(self, storage): """Test register lie""" - with OrionState(experiments=[base_experiment], lies=[base_trial], database=storage) as cfg: + with OrionState(experiments=[base_experiment], lies=[base_trial], storage=storage) as cfg: storage = cfg.storage() with pytest.raises(DuplicateKeyError): @@ -212,11 +325,12 @@ def test_register_lie_fail(self, storage): def test_reserve_trial_success(self, storage): """Test reserve trial""" with OrionState( - experiments=[base_experiment], trials=[base_trial], database=storage) as cfg: + experiments=[base_experiment], trials=[base_trial], storage=storage) as cfg: storage = cfg.storage() - experiment = cfg.get_experiment('default_name', 'default_user', version=None) + experiment = cfg.get_experiment('default_name', version=None) trial = storage.reserve_trial(experiment) + assert trial is not None assert trial.status == 'reserved' @@ -225,10 +339,10 @@ def test_reserve_trial_fail(self, storage): with OrionState( experiments=[base_experiment], trials=generate_trials(status=['completed', 'reserved']), - database=storage) as cfg: + storage=storage) as cfg: storage = cfg.storage() - experiment = cfg.get_experiment('default_name', 'default_user', version=None) + experiment = cfg.get_experiment('default_name', version=None) trial = storage.reserve_trial(experiment) assert trial is None @@ -236,9 +350,9 @@ def test_reserve_trial_fail(self, storage): def test_fetch_trials(self, storage): """Test fetch experiment trials""" with OrionState( - experiments=[base_experiment], trials=generate_trials(), database=storage) as cfg: + experiments=[base_experiment], trials=generate_trials(), storage=storage) as cfg: storage = cfg.storage() - experiment = cfg.get_experiment('default_name', 'default_user', version=None) + experiment = cfg.get_experiment('default_name', version=None) trials1 = storage.fetch_trials(experiment=experiment) trials2 = storage.fetch_trials(uid=experiment._id) @@ -255,10 +369,11 @@ def test_fetch_trials(self, storage): def test_get_trial(self, storage): """Test get trial""" with OrionState( - experiments=[base_experiment], trials=generate_trials(), database=storage) as cfg: + experiments=[base_experiment], trials=generate_trials(), storage=storage) as cfg: storage = cfg.storage() trial_dict = cfg.trials[0] + trial1 = storage.get_trial(trial=Trial(**trial_dict)) trial2 = storage.get_trial(uid=trial1.id) @@ -274,86 +389,40 @@ def test_get_trial(self, storage): def test_fetch_lost_trials(self, storage): """Test update heartbeat""" with OrionState(experiments=[base_experiment], - trials=generate_trials() + [make_lost_trial()], database=storage) as cfg: + trials=generate_trials() + [make_lost_trial()], storage=storage) as cfg: storage = cfg.storage() - experiment = cfg.get_experiment('default_name', 'default_user', version=None) + experiment = cfg.get_experiment('default_name', version=None) trials = storage.fetch_lost_trials(experiment) - assert len(trials) == 1 - - def retrieve_result(self, storage, generated_result): - """Test retrieve result""" - results_file = tempfile.NamedTemporaryFile( - mode='w', prefix='results_', suffix='.log', dir='.', delete=True - ) - - # Generate fake result - with open(results_file.name, 'w') as file: - json.dump([generated_result], file) - # -- - with OrionState(experiments=[], trials=[], database=storage) as cfg: - storage = cfg.storage() - - trial = Trial(**base_trial) - trial = storage.retrieve_result(trial, results_file) - - results = trial.results - - assert len(results) == 1 - assert results[0].to_dict() == generated_result - - def test_retrieve_result(self, storage): - """Test retrieve result""" - self.retrieve_result(storage, generated_result={ - 'name': 'loss', - 'type': 'objective', - 'value': 2}) - - def test_retrieve_result_incorrect_value(self, storage): - """Test retrieve result""" - with pytest.raises(ValueError) as exec: - self.retrieve_result(storage, generated_result={ - 'name': 'loss', - 'type': 'objective_unsupported_type', - 'value': 2}) - - assert exec.match(r'Given type, objective_unsupported_type') - def test_retrieve_result_nofile(self, storage): - """Test retrieve result""" - results_file = tempfile.NamedTemporaryFile( - mode='w', prefix='results_', suffix='.log', dir='.', delete=True - ) - - with OrionState(experiments=[], trials=[], database=storage) as cfg: - storage = cfg.storage() - - trial = Trial(**base_trial) - - with pytest.raises(json.decoder.JSONDecodeError) as exec: - storage.retrieve_result(trial, results_file) - - assert exec.match(r'Expecting value: line 1 column 1 \(char 0\)') - - def test_push_trial_results(self, storage): - """Successfully push a completed trial into database.""" - with OrionState(experiments=[], trials=[base_trial], database=storage) as cfg: - storage = cfg.storage() - trial = storage.get_trial(Trial(**base_trial)) - results = [ - Trial.Result(name='loss', type='objective', value=2) - ] - trial.results = results - assert storage.push_trial_results(trial), 'should update successfully' + count = 0 + now_datetime = datetime.datetime.utcnow() + now_seconds = (now_datetime - datetime.datetime(1970, 1, 1)).total_seconds() + for t in cfg.trials: + status = t.get('status') + if status == 'reserved': + heartbeat = t.get('heartbeat') + if heartbeat is None: + continue + + diff = 0 + if isinstance(heartbeat, datetime.datetime): + diff = (now_datetime - heartbeat).total_seconds() + else: + diff = now_seconds - heartbeat + + if diff > 60 * 2: + count += 1 - trial2 = storage.get_trial(trial) - assert trial2.results == results + assert len(trials) == count - def test_change_status_success(self, storage, exp_config_file): + def test_change_status_success(self, storage): """Change the status of a Trial""" def check_status_change(new_status): - with OrionState(from_yaml=exp_config_file, database=storage) as cfg: - trial = cfg.get_trial(0) + with OrionState( + experiments=[base_experiment], + trials=generate_trials(), storage=storage) as cfg: + trial = get_storage().get_trial(cfg.get_trial(0)) assert trial is not None, 'was not able to retrieve trial for test' get_storage().set_trial_status(trial, status=new_status) @@ -371,14 +440,32 @@ def check_status_change(new_status): check_status_change('suspended') check_status_change('new') - def test_change_status_failed_update(self, storage, exp_config_file): - """Successfully find new trials in db and reserve one at 'random'.""" + def test_change_status_invalid(self, storage): + """Attempt to change the status of a Trial with an invalid one""" + with OrionState( + experiments=[base_experiment], + trials=generate_trials(), storage=storage) as cfg: + trial = get_storage().get_trial(cfg.get_trial(0)) + assert trial is not None, 'Was not able to retrieve trial for test' + + with pytest.raises(ValueError) as exc: + get_storage().set_trial_status(trial, status='moo') + + assert exc.match('Given status `moo` not one of') + + def test_change_status_failed_update(self, storage): + """Change the status of a Trial""" def check_status_change(new_status): - with OrionState(from_yaml=exp_config_file, database=storage) as cfg: - trial = cfg.get_trial(1) + with OrionState( + experiments=[base_experiment], + trials=generate_trials(), storage=storage) as cfg: + trial = get_storage().get_trial(cfg.get_trial(0)) assert trial is not None, 'Was not able to retrieve trial for test' assert trial.status != new_status + if trial.status == new_status: + return + with pytest.raises(FailedUpdate): trial.status = new_status get_storage().set_trial_status(trial, status=new_status) @@ -392,10 +479,10 @@ def check_status_change(new_status): def test_fetch_pending_trials(self, storage): """Test fetch pending trials""" with OrionState( - experiments=[base_experiment], trials=generate_trials(), database=storage) as cfg: + experiments=[base_experiment], trials=generate_trials(), storage=storage) as cfg: storage = cfg.storage() - experiment = cfg.get_experiment('default_name', 'default_user', version=None) + experiment = cfg.get_experiment('default_name', version=None) trials = storage.fetch_pending_trials(experiment) count = 0 @@ -410,10 +497,10 @@ def test_fetch_pending_trials(self, storage): def test_fetch_noncompleted_trials(self, storage): """Test fetch non completed trials""" with OrionState( - experiments=[base_experiment], trials=generate_trials(), database=storage) as cfg: + experiments=[base_experiment], trials=generate_trials(), storage=storage) as cfg: storage = cfg.storage() - experiment = cfg.get_experiment('default_name', 'default_user', version=None) + experiment = cfg.get_experiment('default_name', version=None) trials = storage.fetch_noncompleted_trials(experiment) count = 0 @@ -421,22 +508,23 @@ def test_fetch_noncompleted_trials(self, storage): if trial['status'] != 'completed': count += 1 - assert len(trials) == count for trial in trials: assert trial.status != 'completed' - def test_fetch_trial_by_status(self, storage): + assert len(trials) == count + + def test_fetch_trials_by_status(self, storage): """Test fetch completed trials""" with OrionState( - experiments=[base_experiment], trials=generate_trials(), database=storage) as cfg: + experiments=[base_experiment], trials=generate_trials(), storage=storage) as cfg: count = 0 for trial in cfg.trials: if trial['status'] == 'completed': count += 1 storage = cfg.storage() - experiment = cfg.get_experiment('default_name', 'default_user', version=None) - trials = storage.fetch_trial_by_status(experiment, 'completed') + experiment = cfg.get_experiment('default_name', version=None) + trials = storage.fetch_trials_by_status(experiment, 'completed') assert len(trials) == count for trial in trials: @@ -445,7 +533,7 @@ def test_fetch_trial_by_status(self, storage): def test_count_completed_trials(self, storage): """Test count completed trials""" with OrionState( - experiments=[base_experiment], trials=generate_trials(), database=storage) as cfg: + experiments=[base_experiment], trials=generate_trials(), storage=storage) as cfg: count = 0 for trial in cfg.trials: if trial['status'] == 'completed': @@ -453,14 +541,14 @@ def test_count_completed_trials(self, storage): storage = cfg.storage() - experiment = cfg.get_experiment('default_name', 'default_user', version=None) + experiment = cfg.get_experiment('default_name', version=None) trials = storage.count_completed_trials(experiment) assert trials == count def test_count_broken_trials(self, storage): """Test count broken trials""" with OrionState( - experiments=[base_experiment], trials=generate_trials(), database=storage) as cfg: + experiments=[base_experiment], trials=generate_trials(), storage=storage) as cfg: count = 0 for trial in cfg.trials: if trial['status'] == 'broken': @@ -468,7 +556,7 @@ def test_count_broken_trials(self, storage): storage = cfg.storage() - experiment = cfg.get_experiment('default_name', 'default_user', version=None) + experiment = cfg.get_experiment('default_name', version=None) trials = storage.count_broken_trials(experiment) assert trials == count @@ -476,24 +564,29 @@ def test_count_broken_trials(self, storage): def test_update_heartbeat(self, storage): """Test update heartbeat""" with OrionState( - experiments=[base_experiment], trials=generate_trials(), database=storage) as cfg: + experiments=[base_experiment], trials=generate_trials(), storage=storage) as cfg: storage_name = storage storage = cfg.storage() - exp = cfg.get_experiment(name='default_name') - trial1 = storage.fetch_trial_by_status(exp, status='reserved')[0] + exp = cfg.get_experiment('default_name') + trial1 = storage.fetch_trials_by_status(exp, status='reserved')[0] + trial1b = copy.deepcopy(trial1) storage.update_heartbeat(trial1) trial2 = storage.get_trial(trial1) + # this check that heartbeat is the correct type and that it was updated prior to now + assert trial1b.heartbeat is None assert trial1.heartbeat is None assert trial2.heartbeat is not None - # this checks that heartbeat is the correct type and that it was updated prior to now + + # Sleep a bit, because fast CPUs make this test fail + time.sleep(0.1) assert trial2.heartbeat < datetime.datetime.utcnow() if storage_name is None: - trial3 = storage.fetch_trial_by_status(exp, status='completed')[0] + trial3 = storage.fetch_trials_by_status(exp, status='completed')[0] storage.update_heartbeat(trial3) assert trial3.heartbeat is None, \ diff --git a/tox.ini b/tox.ini index bd051be12..220c4868e 100644 --- a/tox.ini +++ b/tox.ini @@ -1,30 +1,10 @@ [tox] -envlist = py35,py36,py37,flake8,pylint,doc8,packaging,docs +envlist = py36,py37,py38,flake8,pylint,doc8,packaging,docs minversion = 2.7.0 -## Configure test + coverage process - -[pytest] -addopts = -ra -q --color=yes -norecursedirs = .* *.egg* config docs dist build -xfail_strict = True -rsyncdirs = src tests -looponfailroots = src tests - -[coverage:run] -branch = True -source = - src - tests -omit = **/_[a-zA-Z0-9]*.py - -[coverage:report] -# Regexes for lines to exclude from consideration -exclude_lines = - # Don't complain if tests don't hit defensive assertion code: - pass - raise AssertionError - raise NotImplementedError +######################################################### +# Tox environments +######################################################### [testenv] description = Run tests with coverage with pytest under current Python env @@ -32,10 +12,11 @@ setenv = COVERAGE_FILE=.coverage.{envname} passenv = CI TRAVIS TRAVIS_* deps = -rtests/requirements.txt + -rexamples/scikitlearn-iris/requirements.txt coverage commands = pip install -U {toxinidir}/tests/functional/gradient_descent_algo - coverage run --parallel-mode -m pytest -vv --ignore tests/functional/backward_compatibility --timeout=180 + coverage run --parallel-mode -m pytest -vv --ignore tests/functional/backward_compatibility --ignore tests/stress --timeout=180 {posargs} coverage combine coverage report -m @@ -51,6 +32,13 @@ commands = coverage combine coverage report -m +[testenv:stress] +description = Run stress test of Orion +deps = + -rtests/stress/requirements.txt +commands = + python tests/stress/client/stress_experiment.py + [testenv:demo-random] description = Run a demo with random search algorithm setenv = COVERAGE_FILE=.coverage.random @@ -63,6 +51,7 @@ commands = coverage combine coverage report -m +# Coverage environments [testenv:final-coverage] description = Combine coverage data across environments (run after tests) skip_install = True @@ -83,96 +72,56 @@ passenv = {[testenv]passenv} deps = codecov commands = codecov --required -## Setup development process - -[testenv:devel] -description = Incremental devel env command, defaults to running tests +# Linting & Verification environments +[testenv:lint] +description = Lint code and docs against some standard standards +basepython = python3 +skip_install = false deps = - -rtests/requirements.txt -usedevelop = True + {[testenv:flake8]deps} + {[testenv:pylint]deps} + {[testenv:doc8]deps} + {[testenv:packaging]deps} commands = - pip install -U {toxinidir}/tests/functional/gradient_descent_algo - python setup.py test --addopts '-vvv --exitfirst --looponfail {posargs}' + {[testenv:flake8]commands} + {[testenv:pylint]commands} + {[testenv:doc8]commands} + {[testenv:packaging]commands} -## Configure linters - -[flake8] -count = True -show-source = True -doctests = True -# select = E, F, W, C90, I, D, B, B902 -ignore = - # Missing docstring in __init__ - D107 - # blank-line after doc summaries (annoying for modules' doc) - D205 - # conflicts with D211: No blank lines allowed before class docstring - D203 - # do not enforce first-line-period at module docs - D400 - # conflicts with E133: closing bracket is missing indentation - E123 -exclude = - .tox, - .git, - __pycache__, - docs, - config, - build, - dist, - *.pyc, - *.egg-info, - .cache, - .eggs, - src/orion/core/_version.py, - src/orion/core/utils/_appdirs.py -max-line-length = 100 -# McCabe complexity checker -max-complexity = 20 -# flake8-import-order: style -import-order-style = google -# flake8-import-order: local module name checker -application-import-names = orion, versioneer - -[testenv:flake8] +[testenv:flake8] # Will use the configuration file `.flake8` automatically description = Use flake8 linter to impose standards on the project -basepython = python3.6 +basepython = python3 skip_install = true deps = - pydocstyle == 3.0.0 - flake8 == 3.5.0 - flake8-import-order == 0.15 - flake8-docstrings == 1.1.0 - flake8-bugbear == 17.4.0 + flake8 == 3.8.* + flake8-import-order == 0.18.* + flake8-docstrings == 1.5.* + flake8-bugbear == 20.1.* commands = flake8 docs/ src/orion/ tests/ setup.py -[testenv:pylint] +[testenv:pylint] # Will use the configuration file `.pylintrc` automatically description = Perform static analysis and output code metrics -basepython = python3.6 +basepython = python3 skip_install = false deps = - pylint == 1.8.1 + pylint == 2.5.* commands = pylint src/orion/core src/orion/client src/orion/algo -[doc8] -max-line-length = 100 -file-encoding = utf-8 - [testenv:doc8] description = Impose standards on *.rst documentation files -basepython = python3.6 +basepython = python3 skip_install = true deps = -rdocs/requirements.txt - doc8 == 0.8.0 + doc8 == 0.8.* commands = doc8 docs/src/ [testenv:packaging] description = Check whether README.rst is reST and missing from MANIFEST.in -basepython = python3.6 +basepython = python3 deps = check-manifest readme_renderer @@ -180,45 +129,39 @@ commands = check-manifest python setup.py check -r -s -[testenv:lint] -description = Lint code and docs against some standard standards -basepython = python3.6 -skip_install = false +## Development environments +[testenv:devel] +description = Incremental devel env command, defaults to running tests deps = - {[testenv:flake8]deps} - {[testenv:pylint]deps} - {[testenv:doc8]deps} - {[testenv:packaging]deps} + -rtests/requirements.txt + -rexamples/scikitlearn-iris/requirements.txt +usedevelop = True commands = - {[testenv:flake8]commands} - {[testenv:pylint]commands} - {[testenv:doc8]commands} - {[testenv:packaging]commands} - -## Documentation macros + pip install -U {toxinidir}/tests/functional/gradient_descent_algo + python setup.py test --addopts '-vvv --exitfirst --looponfail {posargs}' +# Documentation environments [testenv:docs] description = Invoke sphinx to build documentation and API reference -basepython = python3.6 +basepython = python3 deps = -rdocs/requirements.txt commands = sphinx-build -W --color -c docs/src/ -b html docs/src/ docs/build/html - sphinx-build -W --color -c docs/src/ -b man docs/src/ docs/build/man +# sphinx-build -W --color -c docs/src/ -b man docs/src/ docs/build/man [testenv:serve-docs] -description = Host project's documentation and API reference in localhost -basepython = python3.6 +description = Host the documentation of the project and API reference in localhost +basepython = python3 skip_install = true changedir = docs/build/html deps = commands = python -m http.server 8000 --bind 127.0.0.1 -## Release tooling (to be removed in favor of CI with CD) - +# Release environments (to be removed in favor of CI with CD) [testenv:build] -basepython = python3.6 +basepython = python3 skip_install = true deps = wheel @@ -227,7 +170,7 @@ commands = python setup.py -q sdist bdist_wheel [testenv:release] -basepython = python3.6 +basepython = python3 skip_install = true deps = {[testenv:build]deps} @@ -235,3 +178,35 @@ deps = commands = {[testenv:build]commands} twine upload --skip-existing dist/* + +######################################################### +# Packages & Tools configuration +######################################################### + +# Pytest configuration +[pytest] +addopts = -ra -q --color=yes +norecursedirs = .* *.egg* config docs dist build +xfail_strict = True +rsyncdirs = src tests +looponfailroots = src tests examples + +# Coverage configuration +[coverage:run] +branch = True +source = + src + tests +omit = **/_[a-zA-Z0-9]*.py + +[coverage:report] +exclude_lines = + # Don't complain if tests don't hit defensive assertion code: + pass + raise AssertionError + raise NotImplementedError + +# Doc8 configuration +[doc8] +max-line-length = 100 +file-encoding = utf-8