diff --git a/.gitignore b/.gitignore index 6be4482a6d..323fee4a40 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,8 @@ tags # Sphinx _build +docs/_build +docs/build docs/jupyter_execute docs/.jupyter_cache docs/**/generated/* diff --git a/.readthedocs.yml b/.readthedocs.yml index 67dd44b022..5f9622efa4 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -12,6 +12,6 @@ conda: environment: "conda-envs/environment-docs.yml" build: - os: "ubuntu-20.04" + os: "ubuntu-22.04" tools: python: "mambaforge-4.10" diff --git a/Makefile b/Makefile index 7385ec7957..c92c5c1d4d 100644 --- a/Makefile +++ b/Makefile @@ -2,53 +2,25 @@ # # You can set these variables from the command line. -SPHINXOPTS = SPHINXBUILD = sphinx-build -PAPER = SOURCEDIR = docs/source -BUILDDIR = docs/_build +BUILDDIR = docs/build + +rtd: export READTHEDOCS=true # User-friendly check for sphinx-build ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) endif -# Internal variables. -PAPEROPT_a4 = -D latex_paper_size=a4 -PAPEROPT_letter = -D latex_paper_size=letter -ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . -# the i18n builder cannot share the environment and doctrees with the others -I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . - -.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext +.PHONY: help clean html rtd view help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" - @echo " dirhtml to make HTML files named index.html in directories" - @echo " singlehtml to make a single large HTML file" - @echo " pickle to make pickle files" - @echo " json to make JSON files" - @echo " htmlhelp to make HTML files and a HTML help project" - @echo " qthelp to make HTML files and a qthelp project" - @echo " applehelp to make an Apple Help Book" - @echo " devhelp to make HTML files and a Devhelp project" - @echo " epub to make an epub" - @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" - @echo " latexpdf to make LaTeX files and run them through pdflatex" - @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" - @echo " text to make text files" - @echo " man to make manual pages" - @echo " texinfo to make Texinfo files" - @echo " info to make Texinfo files and run them through makeinfo" - @echo " gettext to make PO message catalogs" - @echo " changes to make an overview of all changed/added/deprecated items" - @echo " xml to make Docutils-native XML files" - @echo " pseudoxml to make pseudoxml-XML files for display purposes" - @echo " linkcheck to check all external links for integrity" - @echo " doctest to run all doctests embedded in the documentation (if enabled)" - @echo " coverage to run coverage check of the documentation (if enabled)" - @echo " serve to launch a server from built html files" + @echo " rtd to build the website without any cache" + @echo " clean to clean cache and intermediate files" + @echo " view to open the built html files" clean: rm -rf $(BUILDDIR)/* @@ -62,140 +34,10 @@ html: @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)." -dirhtml: - $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." - -singlehtml: - $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml - @echo - @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." - -pickle: - $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle - @echo - @echo "Build finished; now you can process the pickle files." - -json: - $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json - @echo - @echo "Build finished; now you can process the JSON files." - -htmlhelp: - $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp - @echo - @echo "Build finished; now you can run HTML Help Workshop with the" \ - ".hhp project file in $(BUILDDIR)/htmlhelp." - -qthelp: - $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp - @echo - @echo "Build finished; now you can run "qcollectiongenerator" with the" \ - ".qhcp project file in $(BUILDDIR)/qthelp, like this:" - @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/pymc.qhcp" - @echo "To view the help file:" - @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/pymc.qhc" - -applehelp: - $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp - @echo - @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." - @echo "N.B. You won't be able to view it unless you put it in" \ - "~/Library/Documentation/Help or install it in your application" \ - "bundle." - -devhelp: - $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp - @echo - @echo "Build finished." - @echo "To view the help file:" - @echo "# mkdir -p $$HOME/.local/share/devhelp/pymc" - @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/pymc" - @echo "# devhelp" - -epub: - $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub - @echo - @echo "Build finished. The epub file is in $(BUILDDIR)/epub." - -latex: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo - @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." - @echo "Run \`make' in that directory to run these through (pdf)latex" \ - "(use \`make latexpdf' here to do that automatically)." - -latexpdf: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo "Running LaTeX files through pdflatex..." - $(MAKE) -C $(BUILDDIR)/latex all-pdf - @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." - -latexpdfja: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo "Running LaTeX files through platex and dvipdfmx..." - $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja - @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." - -text: - $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text +rtd: clean + $(SPHINXBUILD) $(SOURCEDIR) $(BUILDDIR) -b html -E @echo - @echo "Build finished. The text files are in $(BUILDDIR)/text." - -man: - $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man - @echo - @echo "Build finished. The manual pages are in $(BUILDDIR)/man." - -texinfo: - $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo - @echo - @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." - @echo "Run \`make' in that directory to run these through makeinfo" \ - "(use \`make info' here to do that automatically)." - -info: - $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo - @echo "Running Texinfo files through makeinfo..." - make -C $(BUILDDIR)/texinfo info - @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." - -gettext: - $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale - @echo - @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." - -changes: - $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes - @echo - @echo "The overview file is in $(BUILDDIR)/changes." - -linkcheck: - $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck - @echo - @echo "Link check complete; look for any errors in the above output " \ - "or in $(BUILDDIR)/linkcheck/output.txt." - -doctest: - $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest - @echo "Testing of doctests in the sources finished, look at the " \ - "results in $(BUILDDIR)/doctest/output.txt." - -coverage: - $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage - @echo "Testing of coverage in the sources finished, look at the " \ - "results in $(BUILDDIR)/coverage/python.txt." - -xml: - $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml - @echo - @echo "Build finished. The XML files are in $(BUILDDIR)/xml." - -pseudoxml: - $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml - @echo - @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." + @echo "Build finished. The HTML pages are in $(BUILDDIR)." -serve: html - python -m http.server --directory $(BUILDDIR) +view: + python -m webbrowser $(BUILDDIR)/index.html diff --git a/docs/source/PyMC_and_PyTensor.rst b/docs/source/PyMC_and_PyTensor.rst deleted file mode 100644 index a6c06cc22d..0000000000 --- a/docs/source/PyMC_and_PyTensor.rst +++ /dev/null @@ -1,221 +0,0 @@ -:orphan: - -.. - _href from docs/source/index.rst - -================= -PyMC and PyTensor -================= - -What is PyTensor -================ - -PyTensor is a package that allows us to define functions involving array -operations and linear algebra. When we define a PyMC model, we implicitly -build up an PyTensor function from the space of our parameters to -their posterior probability density up to a constant factor. We then use -symbolic manipulations of this function to also get access to its gradient. - -For a thorough introduction to PyTensor see the -:doc:`pytensor docs `, -but for the most part you don't need detailed knowledge about it as long -as you are not trying to define new distributions or other extensions -of PyMC. But let's look at a simple example to get a rough -idea about how it works. Say, we'd like to define the (completely -arbitrarily chosen) function - -.. math:: - - f\colon \mathbb{R} \times \mathbb{R}^n \times \mathbb{N}^n \to \mathbb{R}\\ - (a, x, y) \mapsto \sum_{i=0}^{n} \exp(ax_i^3 + y_i^2). - - -First, we need to define symbolic variables for our inputs (this -is similar to eg SymPy's `Symbol`):: - - import pytensor - import pytensor.tensor as pt - # We don't specify the dtype of our input variables, so it - # defaults to using float64 without any special config. - a = pt.scalar('a') - x = pt.vector('x') - # `pt.ivector` creates a symbolic vector of integers. - y = pt.ivector('y') - -Next, we use those variables to build up a symbolic representation -of the output of our function. Note that no computation is actually -being done at this point. We only record what operations we need to -do to compute the output:: - - inner = a * x**3 + y**2 - out = pt.exp(inner).sum() - -.. note:: - - In this example we use `pt.exp` to create a symbolic representation - of the exponential of `inner`. Somewhat surprisingly, it - would also have worked if we used `np.exp`. This is because numpy - gives objects it operates on a chance to define the results of - operations themselves. PyTensor variables do this for a large number - of operations. We usually still prefer the PyTensor - functions instead of the numpy versions, as that makes it clear that - we are working with symbolic input instead of plain arrays. - -Now we can tell PyTensor to build a function that does this computation. -With a typical configuration, PyTensor generates C code, compiles it, -and creates a python function which wraps the C function:: - - func = pytensor.function([a, x, y], [out]) - -We can call this function with actual arrays as many times as we want:: - - a_val = 1.2 - x_vals = np.random.randn(10) - y_vals = np.random.randn(10) - - out = func(a_val, x_vals, y_vals) - -For the most part the symbolic PyTensor variables can be operated on -like NumPy arrays. Most NumPy functions are available in `pytensor.tensor` -(which is typically imported as `pt`). A lot of linear algebra operations -can be found in `pt.nlinalg` and `pt.slinalg` (the NumPy and SciPy -operations respectively). Some support for sparse matrices is available -in `pytensor.sparse`. For a detailed overview of available operations, -see :mod:`the pytensor api docs `. - -A notable exception where PyTensor variables do *not* behave like -NumPy arrays are operations involving conditional execution. - -Code like this won't work as expected:: - - a = pt.vector('a') - if (a > 0).all(): - b = pt.sqrt(a) - else: - b = -a - -`(a > 0).all()` isn't actually a boolean as it would be in NumPy, but -still a symbolic variable. Python will convert this object to a boolean -and according to the rules for this conversion, things that aren't empty -containers or zero are converted to `True`. So the code is equivalent -to this:: - - a = pt.vector('a') - b = pt.sqrt(a) - -To get the desired behaviour, we can use `pt.switch`:: - - a = pt.vector('a') - b = pt.switch((a > 0).all(), pt.sqrt(a), -a) - -Indexing also works similarly to NumPy:: - - a = pt.vector('a') - # Access the 10th element. This will fail when a function build - # from this expression is executed with an array that is too short. - b = a[10] - - # Extract a subvector - b = a[[1, 2, 10]] - -Changing elements of an array is possible using `pt.set_subtensor`:: - - a = pt.vector('a') - b = pt.set_subtensor(a[:10], 1) - - # is roughly equivalent to this (although pytensor avoids - # the copy if `a` isn't used anymore) - a = np.random.randn(10) - b = a.copy() - b[:10] = 1 - -How PyMC uses PyTensor -==================== - -Now that we have a basic understanding of PyTensor we can look at what -happens if we define a PyMC model. Let's look at a simple example:: - - true_mu = 0.1 - data = true_mu + np.random.randn(50) - - with pm.Model() as model: - mu = pm.Normal('mu', mu=0, sigma=1) - y = pm.Normal('y', mu=mu, sigma=1, observed=data) - -In this model we define two variables: `mu` and `y`. The first is -a free variable that we want to infer, the second is an observed -variable. To sample from the posterior we need to build the function - -.. math:: - - \log P(μ|y) + C = \log P(y|μ) + \log P(μ) =: \text{logp}(μ)\\ - -where with the normal likelihood :math:`N(x|μ,σ^2)` - -.. math:: - - \text{logp}\colon \mathbb{R} \to \mathbb{R}\\ - μ \mapsto \log N(μ|0, 1) + \log N(y|μ, 1), - -To build that function we need to keep track of two things: The parameter -space (the *free variables*) and the logp function. For each free variable -we generate an PyTensor variable. And for each variable (observed or otherwise) -we add a term to the global logp. In the background something similar to -this is happening:: - - # For illustration only, those functions don't actually exist - # in exactly this way! - model = pm.Model() - - mu = pt.scalar('mu') - model.add_free_variable(mu) - model.add_logp_term(pm.Normal.dist(0, 1).logp(mu)) - - model.add_logp_term(pm.Normal.dist(mu, 1).logp(data)) - -So calling `pm.Normal()` modifies the model: It changes the logp function -of the model. If the `observed` keyword isn't set it also creates a new -free variable. In contrast, `pm.Normal.dist()` doesn't care about the model, -it just creates an object that represents the normal distribution. Calling -`logp` on this object creates an PyTensor variable for the logp probability -or log probability density of the distribution, but again without changing -the model in any way. - -Continuous variables with support only on a subset of the real numbers -are treated a bit differently. We create a transformed variable -that has support on the reals and then modify this variable. For -example:: - - with pm.Model() as model: - mu = pm.Normal('mu', 0, 1) - sigma = pm.HalfNormal('sigma', 1) - y = pm.Normal('y', mu=mu, sigma=sigma, observed=data) - -is roughly equivalent to this:: - - # For illustration only, not real code! - model = pm.Model() - mu = pt.scalar('mu') - model.add_free_variable(mu) - model.add_logp_term(pm.Normal.dist(0, 1).logp(mu)) - - sd_log__ = pt.scalar('sd_log__') - model.add_free_variable(sd_log__) - model.add_logp_term(corrected_logp_half_normal(sd_log__)) - - sigma = pt.exp(sd_log__) - model.add_deterministic_variable(sigma) - - model.add_logp_term(pm.Normal.dist(mu, sigma).logp(data)) - -The return values of the variable constructors are subclasses -of PyTensor variables, so when we define a variable we can use any -PyTensor operation on them:: - - design_matrix = np.array([[...]]) - with pm.Model() as model: - # beta is a pt.dvector - beta = pm.Normal('beta', 0, 1, shape=len(design_matrix)) - predict = pt.dot(design_matrix, beta) - sigma = pm.HalfCauchy('sigma', beta=2.5) - pm.Normal('y', mu=predict, sigma=sigma, observed=data) diff --git a/docs/source/_templates/distribution.rst b/docs/source/_templates/distribution.rst new file mode 100644 index 0000000000..34ec9732e2 --- /dev/null +++ b/docs/source/_templates/distribution.rst @@ -0,0 +1,16 @@ +{{ fullname | escape | underline}} + +.. currentmodule:: {{ module }} + +{% if objtype == "class" %} +.. autoclass:: {{ objname }} + + .. rubric:: {{ _('Methods') }} + + .. autosummary:: + :toctree: classmethods + + {{ objname }}.dist +{% else %} +.. autofunction:: {{ objname }} +{% endif %} diff --git a/docs/source/api.rst b/docs/source/api.rst index 2554f2e4d6..4aa717a2dc 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -42,10 +42,10 @@ library, a general purpose library for "exploratory analysis of Bayesian models". * Functions from the `arviz.plots` module are available through ``pymc.`` or ``pymc.plots.``, -but for their API documentation please refer to the :ref:`ArviZ documentation `. + but for their API documentation please refer to the :ref:`ArviZ documentation `. * Functions from the `arviz.stats` module are available through ``pymc.`` or ``pymc.stats.``, -but for their API documentation please refer to the :ref:`ArviZ documentation `. + but for their API documentation please refer to the :ref:`ArviZ documentation `. ArviZ is a dependency of PyMC and so, in addition to the locations described above, importing ArviZ and using ``arviz.`` will also work without any extra installation. diff --git a/docs/source/api/distributions.rst b/docs/source/api/distributions.rst index fd738e9e73..ca3a09cbaa 100644 --- a/docs/source/api/distributions.rst +++ b/docs/source/api/distributions.rst @@ -1,9 +1,11 @@ .. _api_distributions: + ************* Distributions ************* .. toctree:: + :maxdepth: 2 distributions/continuous distributions/discrete @@ -14,5 +16,4 @@ Distributions distributions/censored distributions/simulator distributions/transforms - distributions/logprob distributions/utilities diff --git a/docs/source/api/distributions/censored.rst b/docs/source/api/distributions/censored.rst index 73f1c9363c..59eb8e4fb5 100644 --- a/docs/source/api/distributions/censored.rst +++ b/docs/source/api/distributions/censored.rst @@ -2,8 +2,19 @@ Censored ******** +.. + Manually follow the template in _templates/distribution.rst. + If at any point, multiple objects are listed here, + the pattern should instead be modified to that of the + other API files such as api/distributions/continuous.rst + .. currentmodule:: pymc -.. autosummary:: - :toctree: generated - Censored +.. autoclass:: Censored + + .. rubric:: Methods + + .. autosummary:: + :toctree: classmethods + + Censored.dist diff --git a/docs/source/api/distributions/continuous.rst b/docs/source/api/distributions/continuous.rst index 7ebcb0f206..98f1c97ca1 100644 --- a/docs/source/api/distributions/continuous.rst +++ b/docs/source/api/distributions/continuous.rst @@ -5,6 +5,7 @@ Continuous .. currentmodule:: pymc .. autosummary:: :toctree: generated/ + :template: distribution.rst AsymmetricLaplace Beta diff --git a/docs/source/api/distributions/discrete.rst b/docs/source/api/distributions/discrete.rst index c2c1b7120a..dd9971b174 100644 --- a/docs/source/api/distributions/discrete.rst +++ b/docs/source/api/distributions/discrete.rst @@ -5,6 +5,7 @@ Discrete .. currentmodule:: pymc .. autosummary:: :toctree: generated + :template: distribution.rst Bernoulli BetaBinomial diff --git a/docs/source/api/distributions/mixture.rst b/docs/source/api/distributions/mixture.rst index 5adbd7ef9a..5f921efbe6 100644 --- a/docs/source/api/distributions/mixture.rst +++ b/docs/source/api/distributions/mixture.rst @@ -5,6 +5,7 @@ Mixture .. currentmodule:: pymc .. autosummary:: :toctree: generated + :template: distribution.rst Mixture NormalMixture diff --git a/docs/source/api/distributions/multivariate.rst b/docs/source/api/distributions/multivariate.rst index 02d909cdc9..dbb2ba36ba 100644 --- a/docs/source/api/distributions/multivariate.rst +++ b/docs/source/api/distributions/multivariate.rst @@ -5,6 +5,7 @@ Multivariate .. currentmodule:: pymc .. autosummary:: :toctree: generated + :template: distribution.rst CAR Dirichlet diff --git a/docs/source/api/distributions/simulator.rst b/docs/source/api/distributions/simulator.rst index 509916a875..a85bc58857 100644 --- a/docs/source/api/distributions/simulator.rst +++ b/docs/source/api/distributions/simulator.rst @@ -1,9 +1,20 @@ -********** +********* Simulator -********** +********* + +.. + Manually follow the template in _templates/distribution.rst. + If at any point, multiple objects are listed here, + the pattern should instead be modified to that of the + other API files such as api/distributions/continuous.rst .. currentmodule:: pymc -.. autosummary:: - :toctree: generated - Simulator +.. autoclass:: Simulator + + .. rubric:: Methods + + .. autosummary:: + :toctree: classmethods + + Simulator.dist diff --git a/docs/source/api/distributions/timeseries.rst b/docs/source/api/distributions/timeseries.rst index d0cbf6617c..3791c653d0 100644 --- a/docs/source/api/distributions/timeseries.rst +++ b/docs/source/api/distributions/timeseries.rst @@ -5,6 +5,7 @@ Timeseries .. currentmodule:: pymc .. autosummary:: :toctree: generated + :template: distribution.rst AR EulerMaruyama diff --git a/docs/source/api/distributions/truncated.rst b/docs/source/api/distributions/truncated.rst index 97378d4cd4..184c3329ec 100644 --- a/docs/source/api/distributions/truncated.rst +++ b/docs/source/api/distributions/truncated.rst @@ -2,8 +2,19 @@ Truncated ********* +.. + Manually follow the template in _templates/distribution.rst. + If at any point, multiple objects are listed here, + the pattern should instead be modified to that of the + other API files such as api/distributions/continuous.rst + .. currentmodule:: pymc -.. autosummary:: - :toctree: generated - Truncated +.. autoclass:: Truncated + + .. rubric:: Methods + + .. autosummary:: + :toctree: classmethods + + Truncated.dist diff --git a/docs/source/api/gp.rst b/docs/source/api/gp.rst index 6b2646fbfe..d7770d3d52 100644 --- a/docs/source/api/gp.rst +++ b/docs/source/api/gp.rst @@ -4,6 +4,7 @@ Gaussian Processes .. automodule:: pymc.gp .. toctree:: + :maxdepth: 2 gp/implementations gp/mean diff --git a/docs/source/api/pytensorf.rst b/docs/source/api/pytensorf.rst index ac7760af48..af7b70f041 100644 --- a/docs/source/api/pytensorf.rst +++ b/docs/source/api/pytensorf.rst @@ -1,5 +1,5 @@ PyTensor utils -************ +************** .. currentmodule:: pymc diff --git a/docs/source/conf.py b/docs/source/conf.py index 5518154ab0..7dd23f813a 100755 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -39,7 +39,6 @@ "numpydoc", "IPython.sphinxext.ipython_console_highlighting", "IPython.sphinxext.ipython_directive", - "sphinx.ext.autosectionlabel", "myst_nb", "sphinx_design", "notfound.extension", @@ -108,7 +107,8 @@ # built documents. version = pymc.__version__ -if os.environ.get("READTHEDOCS", False): +on_readthedocs = os.environ.get("READTHEDOCS", False) +if on_readthedocs: rtd_version = os.environ.get("READTHEDOCS_VERSION", "") if rtd_version.lower() == "stable": version = pymc.__version__.split("+")[0] @@ -147,7 +147,7 @@ ] # myst config -nb_execution_mode = "force" +nb_execution_mode = "force" if on_readthedocs else "off" nb_execution_allow_errors = False nb_execution_raise_on_error = True nb_execution_timeout = 300 @@ -306,6 +306,7 @@ "myst": ("https://myst-parser.readthedocs.io/en/latest", None), "myst-nb": ("https://myst-nb.readthedocs.io/en/latest/", None), "python": ("https://docs.python.org/3/", None), + "scipy": ("https://docs.scipy.org/doc/scipy/", None), "xarray": ("https://docs.xarray.dev/en/stable/", None), } diff --git a/docs/source/contributing/build_docs.md b/docs/source/contributing/build_docs.md index d0d4044ec8..5f4954e5f6 100644 --- a/docs/source/contributing/build_docs.md +++ b/docs/source/contributing/build_docs.md @@ -7,23 +7,58 @@ To build docs on Windows we recommend running inside a Docker container. To build the docs, run these commands at PyMC repository root: -```bash -pip install -r requirements-dev.txt # Make sure the dev requirements are installed -pip install numpyro # Make sure `sampling/jax` docs can be built +## Installing dependencies + +```shell +conda install -f conda-envs/environment-docs.yml # or make sure all dependencies listed here are installed pip install -e . # Install local pymc version as installable package -make clean # clean built docs from previous runs and intermediate outputs -make html # Build docs -python -m http.server --directory docs/_build/ # Render docs ``` -Check the printed URL where docs are being served and open it. +## Building the documentation +There is a `Makefile` in the pymc repo to help with the doc building process. + +```shell +make clean +make html +``` + +`make html` is the command that builds the documentation with `sphinx-build`. +`make clean` deletes caches and intermediate files. The `make clean` step is not always necessary, if you are working on a specific page for example, you can rebuild the docs without the clean step and everything should work fine. If you are restructuring the content or editing toctrees, then you'll need to execute `make clean`. -A good approach is to skip the `make clean`, which makes -the `make html` blazing fast and see how everything looks. +A good approach is to generally skip the `make clean`, which makes +the `make html` faster and see how everything looks. If something looks strange, run `make clean` and `make html` one after the other to see if it fixes the issue before checking anything else. + +### Emulate building on readthedocs +The target `rtd` is also available to chain `make clean` with `sphinx-build` +setting also some extra options and environment variables to indicate +sphinx to simulate as much as possible a readthedocs build. + +```shell +make rtd +``` + +:::{important} +This won't reinstall or update any dependencies, unlike on readthedocs where +all dependencies are installed in a clean env before each build. + +But it will execute all notebooks inside the `core_notebooks` folder, +which by default are not executed. Executing the notebooks will add several minutes +to the doc build, as there are 6 notebooks which take between 20s to 5 minutes +to run. +::: + +## View the generated docs + +```shell +make view +``` + +This will use Python's `webbrowser` module to open the generated website on your browser. +The generated website is static, so there is no need to set a server to preview it. diff --git a/docs/source/contributing/index.md b/docs/source/contributing/index.md index 09162a8751..71e275d5d3 100644 --- a/docs/source/contributing/index.md +++ b/docs/source/contributing/index.md @@ -93,6 +93,7 @@ pr_tutorial implementing_distribution build_docs +docker_container running_the_test_suite review_pr_pymc_examples using_gitpod diff --git a/docs/source/contributing/jupyter_style.md b/docs/source/contributing/jupyter_style.md index c002bee3b7..9dbfbd3def 100644 --- a/docs/source/contributing/jupyter_style.md +++ b/docs/source/contributing/jupyter_style.md @@ -6,7 +6,7 @@ All notebooks in pymc-examples must follow this to the letter, the style is more permissive for the ones on pymc where not everything is available. The documentation websites are generated by Sphinx, which uses -{doc}`myst:index` and {doc}`myst-nb:index` +{doc}`MYST ` and {doc}`MYST-NB ` to parse the notebooks. :::{tip} @@ -45,9 +45,9 @@ This guide does not teach nor cover MyST extensively, only gives some opinionate * If the output (or even code and output) of a cell is not necessary to follow the notebook or it is very long and can break the flow of reading, consider hiding - it with a {doc}`toggle button ` + it with a {ref}`toggle button ` -* Consider using {ref}`myst:syntax/figures` to add captions to images used in the notebook. +* Consider using {ref}`myst:syntax/md-figures` to add captions to images used in the notebook. * Use the glossary whenever possible. If you use a term that is defined in the Glossary, link to it the first time that term appears in a significant manner. Use [this syntax](https://jupyterbook.org/content/content-blocks.html?highlight=glossary#glossaries) to add a term reference. [Link to glossary source](https://github.com/pymc-devs/pymc/blob/main/docs/source/glossary.md) where new terms should be added. @@ -200,7 +200,7 @@ It must be notebook specific, for example its file name. **Do not copy paste this and leave `notebook_name` unmodified** ::: -Tags can be anything, but we ask you to try to use [existing tags](https://github.com/pymc-devs/pymc/wiki/Categories-and-Tags-for-PyMC-Examples) +Tags can be anything, but we ask you to try to use {doc}`existing tags ` to avoid the tag list from getting too long. Each notebook should have a one or two categories indicating: @@ -214,8 +214,11 @@ Each notebook should have a one or two categories indicating: - `explanation` - `reference` -Authors should list people who authored, adapted or updated the notebook. See {ref}`jupyter_authors` -for more details. +Authors should list people who authored, adapted or updated the notebook, excluding those +who only re-executed a notebook with little to no code or wording changes. +Only author names should be added here as this is only metadata of the notebook, +self-promotion links and details on the changes should be added at the "Authors" section, +see {ref}`jupyter_authors` for more details. ## Extra dependencies If the notebook uses libraries that are not PyMC dependencies, these extra dependencies should @@ -266,7 +269,7 @@ Thus, notebooks with extra dependencies should: :::{tab-item} extra_dependencies key ```{code-block} json - :emphasize-lines: 19-21 + :emphasize-lines: 19-23 { "kernelspec": { "name": "python3", @@ -285,8 +288,10 @@ Thus, notebooks with extra dependencies should: "nbconvert_exporter": "python", "file_extension": ".py" }, - "substitutions": { - "extra_dependencies": "bambi seaborn" + "myst": { + "substitutions": { + "extra_dependencies": "bambi seaborn" + } } } ``` @@ -294,7 +299,7 @@ Thus, notebooks with extra dependencies should: :::{tab-item} pip and conda specific keys ```{code-block} json - :emphasize-lines: 19-22 + :emphasize-lines: 19-24 { "kernelspec": { "name": "python3", @@ -313,9 +318,11 @@ Thus, notebooks with extra dependencies should: "nbconvert_exporter": "python", "file_extension": ".py" }, - "substitutions": { - "pip_dependencies": "graphviz", - "conda_dependencies": "python-graphviz", + "myst": { + "substitutions": { + "pip_dependencies": "graphviz", + "conda_dependencies": "python-graphviz", + } } } ``` @@ -359,7 +366,9 @@ except FileNotFoundError: ``` ## pre-commit and code formatting -We run some code-quality checks on our notebooks during Continuous Integration. The easiest way to make sure your notebook(s) pass the CI checks is using [pre-commit](https://github.com/pre-commit/pre-commit). You can install it with +We run some code-quality checks on our notebooks during Continuous Integration. +The easiest way to make sure your notebook(s) pass the CI checks is using [pre-commit](https://github.com/pre-commit/pre-commit). +You can install it with ```bash pip install -U pre-commit @@ -371,7 +380,8 @@ and then enable it with pre-commit install ``` -Then, the code-quality checks will run automatically whenever you commit any changes. To run the code-quality checks manually, you can do, e.g.: +Then, the code-quality checks will run automatically whenever you commit any changes. +To run the code-quality checks manually, you can do, e.g.: ```bash pre-commit run --files notebook1.ipynb notebook2.ipynb @@ -397,39 +407,31 @@ np.array( (jupyter_authors)= ## Authorship and attribution After the notebook content finishes, there should be an `## Authors` section with bullet points -to provide attribution to the people who contributed to the the general pattern should be: +to provide attribution to the people who contributed to the notebook. The general pattern should be: ```markdown ## Authors -* by on ([repo#PR](https://link-to.pr)) +* by in ([repo#PR](https://link-to.pr)) ``` -where `` must be one listed below, `` should be the name (multiple people allowed) -which can be formatted as hyperlink to personal site or GitHub profile of the person, +where `` should be the name (multiple people allowed) which can be formatted as +a hyperlink to the personal site or GitHub profile of the person, and `` should preferably be month and year. -authored -: for notebooks created specifically for pymc-examples +The `` part should aim to be descriptive of the changes done, for example +"updated", "re-executed", "authored" or "adapted", but it is not restricted to anything. -adapted -: for notebooks adapted from other sources such as books or blogposts. - It will therefore follow a different structure than the example above - in order to include a link or reference to the original source: - - ```markdown - ## Authors - - Adapted from Alice's [blogpost](blog.alice.com) by Bob and Carol on ... - ``` - -re-executed -: for notebooks re-executed with a newer PyMC version without significant changes to the code. - It can also mention the PyMC version used to run the notebook. - -updated -: for notebooks that have not only been re-executed but have also had significant updates to - their content (either code, explanations or both). +Authors with significant contributions should also be included in the post metadata as indicated in +{ref}`jupyter_style_first_cell`. There are no general and strict guidelines on that, if in doubt, +add yourself and ask reviewers for a second opinion. The main reason for that is the authors +section here at the bottom aims to be a log of all changes that happen to the notebook, +whereas the metadata at the top is used for rendering citation recommendation, +so for example, re-executing a notebook that requires no changes is a valuable contribution +which will be logged both here in this section and on GitHub but does not match authorship criteria +for citing. On the other hand, updating the wording and rendering of a notebook to make +it clearer and more friendly to the reader is something that should be added in both places, +even if the notebook is not re-executed. some examples: @@ -449,10 +451,10 @@ and ## Authors * Adapted from chapter 5 of Bayesian Data Analysis 3rd Edition {cite:p}`gelman2013bayesian` - by Demetri Pananos and Junpeng Lao on June, 2018 ([pymc#3054](https://github.com/pymc-devs/pymc/pull/3054)) -* Reexecuted by Ravin Kumar with PyMC 3.6 on March, 2019 ([pymc#3397](https://github.com/pymc-devs/pymc/pull/3397)) -* Reexecuted by Alex Andorra and Michael Osthege with PyMC 3.9 on June, 2020 ([pymc#3955](https://github.com/pymc-devs/pymc/pull/3955)) -* Updated by Raúl Maldonado 2021 ([pymc-examples#24](https://github.com/pymc-devs/pymc-examples/pull/24), [pymc-examples#45](https://github.com/pymc-devs/pymc-examples/pull/45) and [pymc-examples#147](https://github.com/pymc-devs/pymc-examples/pull/147)) + by Demetri Pananos and Junpeng Lao in June, 2018 ([pymc#3054](https://github.com/pymc-devs/pymc/pull/3054)) +* Reexecuted by Ravin Kumar with PyMC 3.6 in March, 2019 ([pymc#3397](https://github.com/pymc-devs/pymc/pull/3397)) +* Reexecuted by Alex Andorra and Michael Osthege with PyMC 3.9 in June, 2020 ([pymc#3955](https://github.com/pymc-devs/pymc/pull/3955)) +* Updated by Raúl Maldonado in 2021 ([pymc-examples#24](https://github.com/pymc-devs/pymc-examples/pull/24), [pymc-examples#45](https://github.com/pymc-devs/pymc-examples/pull/45) and [pymc-examples#147](https://github.com/pymc-devs/pymc-examples/pull/147)) ``` ## References diff --git a/docs/source/glossary.md b/docs/source/glossary.md index 4e58cb7144..ba9f47dd46 100644 --- a/docs/source/glossary.md +++ b/docs/source/glossary.md @@ -24,7 +24,7 @@ Underdispersion Bayesian Workflow The Bayesian workflow involves all the steps needed for model building. This includes {term}`Bayesian inference` but also other tasks such as i) diagnoses of the quality of the inference, ii) model criticism, including evaluations of both model assumptions and model predictions, iii) comparison of models, not -just for the purpose of model selection or model averaging but more importantly to better understand these models and iv) Preparation of the results for a particular audience. These non-inferencial tasks require both numerical and visual summaries to help practitioners analyse their models. And they are sometimes collectively known as [Exploratory Analysis of Bayesian Models](https://joss.theoj.org/papers/10.21105/joss.01143). + just for the purpose of model selection or model averaging but more importantly to better understand these models and iv) Preparation of the results for a particular audience. These non-inferencial tasks require both numerical and visual summaries to help practitioners analyse their models. And they are sometimes collectively known as [Exploratory Analysis of Bayesian Models](https://joss.theoj.org/papers/10.21105/joss.01143). - For a compact overview, see Bayesian statistics and modelling by van de Schoot, R., Depaoli, S., King, R. et al in Nat Rev Methods - Primers 1, 1 (2021). - For an in-depth overview, see Bayesian Workflow by Andrew Gelman, Aki Vehtari, Daniel Simpson, Charles C. Margossian, Bob Carpenter, Yuling Yao, Lauren Kennedy, Jonah Gabry, Paul-Christian Bürkner, Martin Modrák - For an exercise-based material, see Think Bayes 2e: Bayesian Statistics Made Simple by Allen B. Downey @@ -132,6 +132,7 @@ tensor_like pt.as_tensor_variable([[1, 2.0], [0, 0]]) ``` + unnamed_distribution PyMC distributions can be initialized directly (e.g. `pm.Normal`) or using the `.dist` classmethod (e.g. `pm.Normal.dist`). Distributions initialized with the 1st method are registered as model parameters and thus, need to be given a name and be initialized within a model context. "unnamed_distributions" are distributions initialized with the 2nd method. These are standalone distributions, they are not parameters in any model and can be used to draw samples from a distribution by itself or as parameters to other distributions like mixtures or censored. diff --git a/docs/source/installation.md b/docs/source/installation.md index 40bc6bfebf..02f1fa39c2 100644 --- a/docs/source/installation.md +++ b/docs/source/installation.md @@ -18,16 +18,17 @@ issues when using multiple conda channels (e.g. defaults and conda-forge). ## JAX sampling -If you wish to enable sampling using the JAX backend via NumPyro, the following should also be installed: +If you wish to enable sampling using the JAX backend via NumPyro, +you need to install it manually as it is an optional dependency: ```console -pip install numpyro +conda install numpyro ``` -Similarly, to use BlackJAX for sampling it should be installed via `pip`: +Similarly, to use BlackJAX sampler instead: ```console -pip install blackjax +conda install blackjax ``` Note that JAX is not directly supported on Windows systems at the moment. diff --git a/docs/source/learn.md b/docs/source/learn.md index 01d19df209..575b95fb37 100644 --- a/docs/source/learn.md +++ b/docs/source/learn.md @@ -18,7 +18,7 @@ glossary ### Intermediate - {ref}`pymc_overview` shows PyMC 4.0 code in action - - Example notebooks: {ref}`nb:index` + - Example notebooks: {doc}`nb:gallery` - {ref}`GLM_linear` - {ref}`posterior_predictive` - Comparing models: {ref}`model_comparison` diff --git a/docs/source/learn/core_notebooks/dimensionality.ipynb b/docs/source/learn/core_notebooks/dimensionality.ipynb index 3fa8cbdd9d..0be522c86b 100644 --- a/docs/source/learn/core_notebooks/dimensionality.ipynb +++ b/docs/source/learn/core_notebooks/dimensionality.ipynb @@ -390,7 +390,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "It's important to understand how NumPy {ref}`broadcasting ` works. When you do something that is not valid, you will easily encounter this sort of errors:" + "It's important to understand how NumPy {ref}`broadcasting ` works. When you do something that is not valid, you will easily encounter this sort of errors:" ] }, { diff --git a/pymc/distributions/distribution.py b/pymc/distributions/distribution.py index 34c08aa211..6ea639d9f6 100644 --- a/pymc/distributions/distribution.py +++ b/pymc/distributions/distribution.py @@ -196,6 +196,7 @@ class SymbolicRandomVariable(OpFromGraph): """Tuple of (name, latex name) used for for pretty-printing variables of this type""" def __init__(self, *args, ndim_supp, **kwargs): + """Initialitze a SymbolicRandomVariable class.""" self.ndim_supp = ndim_supp kwargs.setdefault("inline", True) super().__init__(*args, **kwargs) diff --git a/pymc/distributions/mixture.py b/pymc/distributions/mixture.py index 4b89f24ce2..ade5f171b4 100644 --- a/pymc/distributions/mixture.py +++ b/pymc/distributions/mixture.py @@ -613,11 +613,13 @@ class ZeroInflatedPoisson: plt.ylabel('f(x)', fontsize=12) plt.legend(loc=1) plt.show() + ======== ========================== Support :math:`x \in \mathbb{N}_0` Mean :math:`\psi\mu` Variance :math:`\mu + \frac{1-\psi}{\psi}\mu^2` ======== ========================== + Parameters ---------- psi : tensor_like of float @@ -674,11 +676,13 @@ class ZeroInflatedBinomial: plt.ylabel('f(x)', fontsize=12) plt.legend(loc=1) plt.show() + ======== ========================== Support :math:`x \in \mathbb{N}_0` Mean :math:`\psi n p` Variance :math:`(1-\psi) n p [1 - p(1 - \psi n)].` ======== ========================== + Parameters ---------- psi : tensor_like of float @@ -750,6 +754,7 @@ def ZeroInfNegBinom(a, m, psi, x): plt.ylabel('f(x)', fontsize=12) plt.legend(loc=1) plt.show() + ======== ========================== Support :math:`x \in \mathbb{N}_0` Mean :math:`\psi\mu` diff --git a/pymc/distributions/multivariate.py b/pymc/distributions/multivariate.py index dac15dff2a..ba613967c1 100644 --- a/pymc/distributions/multivariate.py +++ b/pymc/distributions/multivariate.py @@ -752,10 +752,10 @@ class OrderedMultinomial: Useful for regression on ordinal data whose values range from 1 to K as a function of some predictor, :math:`\eta`, but - which are _aggregated_ by trial, like multinomial observations (in - contrast to `pm.OrderedLogistic`, which only accepts ordinal data - in a _disaggregated_ format, like categorical observations). - The cutpoints, :math:`c`, separate which ranges of :math:`\eta` are + which are _aggregated_ by trial, like multinomial observations (in + contrast to `pm.OrderedLogistic`, which only accepts ordinal data + in a _disaggregated_ format, like categorical observations). + The cutpoints, :math:`c`, separate which ranges of :math:`\eta` are mapped to which of the K observed dependent variables. The number of cutpoints is K - 1. It is recommended that the cutpoints are constrained to be ordered. diff --git a/pymc/distributions/shape_utils.py b/pymc/distributions/shape_utils.py index 2987ec444c..e792bed7f2 100644 --- a/pymc/distributions/shape_utils.py +++ b/pymc/distributions/shape_utils.py @@ -328,6 +328,7 @@ def change_dist_size( Examples -------- .. code-block:: python + x = Normal.dist(shape=(2, 3)) new_x = change_dist_size(x, new_size=(5, 3), expand=False) assert new_x.eval().shape == (5, 3)