diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ddec88129..8b81d83a8 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -22,7 +22,7 @@ jobs: - name: Set up Python ${{ matrix.python-version }} uses: conda-incubator/setup-miniconda@v2 with: - channels: conda-forge,defaults + channels: conda-forge, defaults channel-priority: true python-version: ${{ matrix.python-version }} auto-update-conda: true @@ -30,6 +30,7 @@ jobs: - name: Install dev environment & bambi shell: bash -l {0} run: | + conda install -c conda-forge python-graphviz conda install pip pip install -r requirements.txt pip install -r requirements-dev.txt diff --git a/bambi/backends/pymc.py b/bambi/backends/pymc.py index 969946142..fb0494fa0 100644 --- a/bambi/backends/pymc.py +++ b/bambi/backends/pymc.py @@ -4,8 +4,8 @@ import theano.tensor as tt import pymc3 as pm +from bambi import version from bambi.priors import Prior -import bambi.version as version from .base import BackEnd from .utils import probit, cloglog @@ -55,43 +55,70 @@ def build(self, spec): # pylint: disable=arguments-differ self.model = pm.Model(coords=coords) noncentered = spec.noncentered + ## Add common effects + # Common effects have at most ONE coord. + # If ndim > 1 we're certain there's more than one column (see squeeze) with self.model: self.mu = 0.0 - for term in spec.terms.values(): - data = term.data + for term in spec.common_terms.values(): + data = term.data.squeeze() label = term.name - name = term.prior.name + dist = term.prior.name args = term.prior.args - shape = term.data.shape[1] - - if term.group_specific: - # Group-specific terms always have pymc_coords, at least for the group. + if term.pymc_coords: dims = list(term.pymc_coords.keys()) - coef = self.build_group_specific_distribution( - name, label, noncentered, dims=dims, **args - ) - # term.predictor.shape[1] is larger than one when the expression is a - # categorical variable with more than one level. - # This is not the most beautiful alternative, but it resulted to be the - # fastest. Doing matrix multiplication, pm.math.dot(data, coef), is slower. - coef_ = coef[term.group_index] - if term.predictor.shape[1] > 1: - for col in range(term.predictor.shape[1]): - self.mu += coef_[:, col][:, None] * term.predictor[:, col] - else: - self.mu += coef_[:, None] * term.predictor + coef = self.build_common_distribution(dist, label, dims=dims, **args) + else: + shape = () if data.ndim == 1 else data.shape[1] + coef = self.build_common_distribution(dist, label, shape=shape, **args) + + if data.ndim == 1: + self.mu += data * coef + else: + self.mu += pm.math.dot(data, coef) + + ## Add group-specific effects + # Group-specific effects always have pymc_coords. At least for the group. + # The loop through predictor columns is not the most beautiful alternative. + # But it's the fastest. Doing matrix multiplication, pm.math.dot(data, coef), is slower. + + # Add group specific terms that have prior for their correlation matrix + with self.model: + for group, eta in spec.priors_cor.items(): + # pylint: disable=protected-access + terms = [spec.terms[name] for name in spec._get_group_specific_groups()[group]] + self.mu += add_lkj(terms, eta) + + # Add group specific terms that don't have a prior for their correlation matrix + terms = [ + term + for term in spec.group_specific_terms.values() + if term.name.split("|")[1] not in spec.priors_cor + ] + with self.model: + for term in terms: + label = term.name + dist = term.prior.name + args = term.prior.args + predictor = term.predictor.squeeze() + dims = list(term.pymc_coords.keys()) + + coef = self.build_group_specific_distribution( + dist, label, noncentered, dims=dims, **args + ) + coef = coef[term.group_index] + + if predictor.ndim > 1: + for col in range(predictor.shape[1]): + self.mu += coef[:, col] * predictor[:, col] else: - if term.pymc_coords: - # Common effects have at most ONE coord. - dims = list(term.pymc_coords.keys()) - coef = self.build_common_distribution(name, label, dims=dims, **args) - else: - shape = () if shape == 1 else shape - coef = self.build_common_distribution(name, label, shape=shape, **args) - self.mu += pm.math.dot(data, coef)[:, None] - - self._build_response(spec) - self.spec = spec + self.mu += coef * predictor + + # Build response distribution + with self.model: + self.build_response(spec) + + self.spec = spec # pylint: disable=arguments-differ, inconsistent-return-statements def run( @@ -146,6 +173,13 @@ def run( getattr(idata, group).attrs["modeling_interface"] = "bambi" getattr(idata, group).attrs["modeling_interface_version"] = version.__version__ + # Drop variables and dimensions associated with LKJ prior + vars_to_drop = [var for var in idata.posterior.var() if var.startswith("_LKJ")] + dims_to_drop = [dim for dim in idata.posterior.dims if dim.startswith("_LKJ")] + + idata.posterior = idata.posterior.drop_vars(vars_to_drop) + idata.posterior = idata.posterior.drop_dims(dims_to_drop) + # Reorder coords # pylint: disable=protected-access coords_original = list(self.spec._get_pymc_coords().keys()) @@ -153,6 +187,7 @@ def run( for coord in coords_group: coords_original.remove(coord) coords_new = ["chain", "draw"] + coords_original + coords_group + idata.posterior = idata.posterior.transpose(*coords_new) self.fit = True @@ -174,7 +209,7 @@ def build_common_distribution(self, dist, label, **kwargs): return distribution(label, **kwargs) def build_group_specific_distribution(self, dist, label, noncentered, **kwargs): - """Build and return a PyMC3 Distribution.""" + """Build and return a PyMC3 Distribution for a group specific term.""" dist = self.get_distribution(dist) if "dims" in kwargs: group_dim = [dim for dim in kwargs["dims"] if dim.endswith("_group_expr")] @@ -186,32 +221,16 @@ def build_group_specific_distribution(self, dist, label, noncentered, **kwargs): kwargs = { k: self.expand_prior_args(k, v, label, noncentered) for (k, v) in kwargs.items() } - # Non-centered parameterization for hyperpriors if noncentered and has_hyperprior(kwargs): old_sigma = kwargs["sigma"] _offset = pm.Normal(label + "_offset", mu=0, sigma=1, dims=kwargs["dims"]) return pm.Deterministic(label, _offset * old_sigma, dims=kwargs["dims"]) - return dist(label, **kwargs) - def get_distribution(self, dist): - """Return a PyMC3 distribution.""" - if isinstance(dist, str): - if hasattr(pm, dist): - dist = getattr(pm, dist) - elif dist in self.dists: - dist = self.dists[dist] - else: - raise ValueError( - f"The Distribution {dist} was not found in PyMC3 or the PyMC3BackEnd." - ) - return dist - - def _build_response(self, spec): + def build_response(self, spec): """Build and return a response distribution.""" - - data = spec.response.data + data = spec.response.data.squeeze() name = spec.response.name prior = spec.family.prior link = spec.family.link @@ -233,8 +252,21 @@ def _build_response(self, spec): return dist(name, **kwargs) + def get_distribution(self, dist): + """Return a PyMC3 distribution.""" + if isinstance(dist, str): + if hasattr(pm, dist): + dist = getattr(pm, dist) + elif dist in self.dists: + dist = self.dists[dist] + else: + raise ValueError( + f"The Distribution {dist} was not found in PyMC3 or the PyMC3BackEnd." + ) + return dist + def expand_prior_args(self, key, value, label, noncentered, **kwargs): - # Inspect all args in case we have hyperparameters + # Inspect all args in case we have hyperparameters. # kwargs are used to pass 'dims' for group specific terms. if isinstance(value, Prior): return self.build_group_specific_distribution( @@ -279,9 +311,92 @@ def _laplace(model): def has_hyperprior(kwargs): """Determines if a Prior has an hyperprior""" - return ( "sigma" in kwargs and "observed" not in kwargs and isinstance(kwargs["sigma"], pm.model.TransformedRV) ) + + +def add_lkj(terms, eta=1): + """Add correlated prior for group-specific effects. + + This function receives a list of group-specific terms that share their `grouper`, constructs + a multivariate Normal prior with LKJ prior on the correlation matrix, and adds the necessary + variables to the model. It uses a non-centered parametrization. + + Parameters + ---------- + terms: list + A list of terms that share a common grouper (i.e. ``1|Group`` and ``Variable|Group`` in + formula notation). + eta: num + The value for the eta parameter in the LKJ distribution. + + Parameters + ---------- + mu: + The contribution to the linear predictor of the roup-specific terms in ``terms``. + """ + + # Parameters + # grouper: The name of the grouper. + # rows: Sum of the number of columns in all the "Xi" matrices for a given grouper. + # Same than the order of L + # cols: Number of groups in the grouper variable + mu = 0 + grouper = terms[0].name.split("|")[1] + rows = int(np.sum([term.predictor.shape[1] for term in terms])) + cols = int(terms[0].grouper.shape[1]) # not the most beautiful, but works + + # Construct sigma + # Horizontally stack the sigma values for all the hyperpriors + sigma = np.hstack([term.prior.args["sigma"].args["sigma"] for term in terms]) + + # Reconstruct the hyperprior for the standard deviations, using one variable + sigma = pm.HalfNormal.dist(sigma=sigma, shape=rows) + + # Obtain Cholesky factor for the covariance + lkj_decomp, corr, sigma = pm.LKJCholeskyCov( # pylint: disable=unused-variable + "_LKJCholeskyCov_" + grouper, + n=rows, + eta=eta, + sd_dist=sigma, + compute_corr=True, + store_in_trace=False, + ) + + coefs_offset = pm.Normal("_LKJ_" + grouper + "_offset", mu=0, sigma=1, shape=(rows, cols)) + coefs = tt.dot(lkj_decomp, coefs_offset).T + + ## Separate group-specific terms + start = 0 + for term in terms: + label = term.name + dims = list(term.pymc_coords.keys()) + predictor = term.predictor.squeeze() + delta = term.predictor.shape[1] + + if delta == 1: + idx = start + else: + idx = slice(start, start + delta) + + # Add prior for the parameter + coef = pm.Deterministic(label, coefs[:, idx], dims=dims) + coef = coef[term.group_index] + + # Add standard deviation of the hyperprior distribution + group_dim = [dim for dim in dims if dim.endswith("_group_expr")] + pm.Deterministic(label + "_sigma", sigma[idx], dims=group_dim) + + # Account for the contribution of the term to the linear predictor + if predictor.ndim > 1: + for col in range(predictor.shape[1]): + mu += coef[:, col] * predictor[:, col] + else: + mu += coef * predictor + start += delta + + # TO DO: Add correlations + return mu diff --git a/bambi/models.py b/bambi/models.py index 198c2afa5..3b3ac7d1e 100644 --- a/bambi/models.py +++ b/bambi/models.py @@ -1,6 +1,5 @@ # pylint: disable=no-name-in-module # pylint: disable=too-many-lines -import re import logging from copy import deepcopy @@ -14,7 +13,7 @@ from formulae import design_matrices from .backends import PyMC3BackEnd -from .priors import Prior, PriorFactory, PriorScaler, Family +from .priors import Prior, PriorFactory, PriorScaler, PriorScalerMLE, Family from .terms import ResponseTerm, Term, GroupSpecificTerm from .utils import listify, extract_family_prior, link_match_family from .version import __version__ @@ -67,9 +66,16 @@ class Model: dictionary containing named distributions, families, and terms (see the documentation in ``priors.PriorFactory`` for details), or the name of a JSON file containing the same information. + automatic_priors: str + An optional specification to compute/scale automatic priors. ``"default"`` means to use + a method inspired on the R rstanarm library. ``"mle"`` means to use old default priors in + Bambi that rely on maximum likelihood estimations obtained via the statsmodels library. noncentered : bool If ``True`` (default), uses a non-centered parameterization for normal hyperpriors on grouped parameters. If ``False``, naive (centered) parameterization is used. + priors_cor = dict + The value of eta in the prior for the correlation matrix of group-specific terms. + Keys in the dictionary indicate the groups, and values indicate the value of eta. taylor : int Order of Taylor expansion to use in approximate variance when constructing the default priors. Should be between 1 and 13. Lower values are less accurate, tending to undershoot @@ -90,7 +96,9 @@ def __init__( dropna=False, auto_scale=True, default_priors=None, + automatic_priors="default", noncentered=True, + priors_cor=None, taylor=None, ): # attributes that are set later @@ -106,6 +114,7 @@ def __init__( self.response = None # _add_response() self.family = None # _add_response() self.backend = None # _set_backend() + self.priors_cor = {} # _add_priors_cor() self.auto_scale = auto_scale self.dropna = dropna @@ -139,7 +148,9 @@ def __init__( priors = {} else: priors = deepcopy(priors) + self.default_priors = PriorFactory(default_priors) + self.automatic_priors = automatic_priors # Obtain design matrices and related objects. na_action = "drop" if dropna else "error" @@ -150,17 +161,16 @@ def __init__( raise ValueError("Can't instantiate a model without a model formula.") if self._design.response is not None: - _family = family.name if isinstance(family, Family) else family - priors_ = extract_family_prior(family, priors) - if priors_ and self._design.common: - conflicts = [name for name in priors_ if name in self._design.common.terms_info] - if conflicts: + family_prior = extract_family_prior(family, priors) + if family_prior and self._design.common: + conflict = [name for name in family_prior if name in self._design.common.terms_info] + if conflict: raise ValueError( - f"The prior name for {', '.join(conflicts)} conflicts with the name of a " + f"The prior name for {', '.join(conflict)} conflicts with the name of a " "parameter in the response distribution.\n" "Please rename the term(s) to prevent an unexpected behaviour." ) - self._add_response(self._design.response, family, link, priors_) + self._add_response(self._design.response, family, link, family_prior) else: raise ValueError( "No outcome variable is set! " @@ -173,13 +183,15 @@ def __init__( if self._design.group: self._add_group_specific(self._design.group, priors) + if priors_cor: + self._add_priors_cor(priors_cor) + # Build priors self._build_priors() def fit( self, omit_offsets=True, - backend="pymc", **kwargs, ): """Fit the model using the specified backend. @@ -189,19 +201,10 @@ def fit( omit_offsets: bool Omits offset terms in the ``InferenceData`` object when the model includes group specific effects. Defaults to ``True``. - backend : str - The name of the backend to use. Currently only ``'pymc'`` backend is supported. """ - # There's a problem if we pretend to move .build() to instantiation - # We would have to assume the backend, which is fine now since we're using PyMC3. - # Nevermind, prior scaling can be outside .build() bc the backend is not needed!! - if backend is None: - backend = "pymc" if self._backend_name is None else self._backend_name - - if not self.built or backend != self._backend_name: - self.build(backend) - self._backend_name = backend + if not self.built: + self.build() # Tell user which event is being modeled if self.family.name == "bernoulli": @@ -213,32 +216,17 @@ def fit( return self.backend.run(omit_offsets=omit_offsets, **kwargs) - def build(self, backend="pymc"): + def build(self): """Set up the model for sampling/fitting. Performs any steps that require access to all model terms (e.g., scaling priors on each term), then calls the backend's ``build()`` method. - - Parameters - ---------- - backend : str - The name of the backend to use for model fitting. - Currently only ``'pymc'`` is supported. """ - - # Check for backend - if backend is None: - if self._backend_name is None: - raise ValueError( - "No backend was passed or set in the Model; did you forget to call fit()?" - ) - backend = self._backend_name - - self._set_backend(backend) + self.backend = PyMC3BackEnd() self.backend.build(self) self.built = True - def set_priors(self, priors=None, common=None, group_specific=None, match_derived_names=True): + def set_priors(self, priors=None, common=None, group_specific=None): """Set priors for one or more existing terms. Parameters @@ -252,73 +240,55 @@ def set_priors(self, priors=None, common=None, group_specific=None, match_derive A prior specification to apply to all common terms included in the model. group_specific : Prior, int, float or str A prior specification to apply to all group specific terms included in the model. - match_derived_names : bool - If ``True``, the specified prior(s) will be applied not only to terms that match the - keyword exactly, but to the levels of group specific effects that were derived from the - original specification with the passed name. For example, - ``priors={'condition|subject':0.5}`` would apply the prior to the terms with names - ``'1|subject'``, ``'condition[T.1]|subject'``, and so on. If ``False``, an exact - match is required for the prior to be applied. """ # save arguments to pass to _set_priors() at build time - kwargs = dict( - zip( - ["priors", "common", "group_specific", "match_derived_names"], - [priors, common, group_specific, match_derived_names], - ) - ) + kwargs = dict(zip(["priors", "common", "group_specific"], [priors, common, group_specific])) self._added_priors.update(kwargs) # After updating, we need to rebuild priors. # There is redundancy here, so there's place for performance improvements. self._build_priors() self.built = False - def _set_backend(self, backend): - backend = backend.lower() - if backend.startswith("pymc"): - self.backend = PyMC3BackEnd() - else: - raise ValueError("At the moment, only the PyMC3 backend is supported.") - - self._backend_name = backend - def _build_priors(self): """Carry out all operations related to the construction and/or scaling of priors.""" - # set custom priors + # Set custom priors that have been passed via `Model.set_priors()` self._set_priors(**self._added_priors) - # prepare all priors - for name, term in self.terms.items(): - type_ = ( - "intercept" - if name == "Intercept" - else "group_specific" - if self.terms[name].group_specific - else "common" - ) + # Prepare all priors + for term in self.terms.values(): + if term.group_specific: + type_ = "group_specific" + elif term.type == "intercept": + type_ = "intercept" + else: + type_ = "common" term.prior = self._prepare_prior(term.prior, type_) - # throw informative error message if any categorical predictors have 1 category - num_cats = [x.data.size for x in self.common_terms.values()] - if any(np.array(num_cats) == 0): - raise ValueError("At least one categorical predictor contains only 1 category!") - - # only set priors if there is at least one term in the model - if self.terms: - # Get and scale default priors if none are defined yet - if self.taylor is not None: - taylor = self.taylor + # Scale priors if there is at least one term in the model and auto_scale is True + if self.terms and self.auto_scale: + method = self.automatic_priors + if method == "default": + scaler = PriorScaler(self) + elif method == "mle": + if self.taylor is not None: + taylor = self.taylor + else: + taylor = 5 if self.family.name == "gaussian" else 1 + scaler = PriorScalerMLE(self, taylor=taylor) else: - taylor = 5 if self.family.name == "gaussian" else 1 - scaler = PriorScaler(self, taylor=taylor) + raise ValueError( + f"{method} is not a valid method for default priors." "Use 'default' or 'mle'." + ) self.scaler = scaler - scaler.scale() + self.scaler.scale() - def _set_priors(self, priors=None, common=None, group_specific=None, match_derived_names=True): + def _set_priors(self, priors=None, common=None, group_specific=None): """Internal version of ``set_priors()``, with same arguments. Runs during ``Model._build_priors()``. """ + # First, it constructs a `targets` dict where it store key-value (name-prior) pairs that + # are going to be updated. Finally, the update is done in the last for loop in this method. targets = {} if common is not None: @@ -328,52 +298,43 @@ def _set_priors(self, priors=None, common=None, group_specific=None, match_deriv targets.update({name: group_specific for name in self.group_specific_terms.keys()}) if priors is not None: - # Update priors related to nuisance parameters of response distribution - priors_ = extract_family_prior(self.family, priors) - if priors_: - # Remove keys passed to the response. - for key in priors_: - priors.pop(key) - self.response.prior.args.update(priors_) + # Prepare priors for response nuisance parameters + family_prior = extract_family_prior(self.family, priors) + if family_prior: + self.response.prior.args.update(family_prior) + self.response.prior.auto_scale = False # Prepare priors for explanatory terms. - for k, prior in priors.items(): - for name in listify(k): - term_names = list(self.terms.keys()) - msg = f"No terms in model match {name}." - if name not in term_names: - terms = self._match_derived_terms(name) - if not match_derived_names or terms is None: - raise ValueError(msg) - for term in terms: - targets[term.name] = prior - else: - targets[name] = prior + for names, prior in priors.items(): + # In case we have tuple-keys, we loop throuh each of them. + for name in listify(names): + if name not in list(self.terms.keys()): + raise ValueError(f"No terms in model match {name}.") + targets[name] = prior # Set priors for explanatory terms. for name, prior in targets.items(): self.terms[name].prior = prior - def _prepare_prior(self, prior, _type): + def _prepare_prior(self, prior, type_): """Helper function to correctly set default priors, auto scaling, etc. Parameters ---------- - prior : Prior object, or float, or None. - _type : string + prior : Prior, float, or None. + type_ : string accepted values are: ``'intercept'``, ``'common'``, or ``'group_specific'``. """ + if prior is None and not self.auto_scale: - prior = self.default_priors.get(term=_type + "_flat") + prior = self.default_priors.get(term=type_ + "_flat") if isinstance(prior, Prior): - prior._auto_scale = False # pylint: disable=protected-access + prior.auto_scale = False else: _scale = prior - prior = self.default_priors.get(term=_type) + prior = self.default_priors.get(term=type_) prior.scale = _scale - if prior.scale is not None: - prior._auto_scale = False # pylint: disable=protected-access return prior def _add_response(self, response, family="gaussian", link=None, priors=None): @@ -412,32 +373,23 @@ def _add_response(self, response, family="gaussian", link=None, priors=None): elif not isinstance(family, Family): raise ValueError("family must be a string or a Family object.") - self.family = family - # Override family's link if another is explicitly passed if link is not None: if link_match_family(link, family.name): - self.family._set_link(link) # pylint: disable=protected-access + family._set_link(link) # pylint: disable=protected-access else: raise ValueError(f"Link {link} cannot be used with family {family.name}") - prior = self.family.prior - - # not None when user passes priors for nuisance parameters, either for built-in familes or - # for custom families. + # Update nuisance parameters if priors is not None: - prior.args.update(priors) + family.prior.args.update(priors) + family.prior.auto_scale = False - if self.family.name == "gaussian": - if priors is None: - prior.update( - sigma=Prior("HalfStudentT", nu=4, sigma=np.std(response.design_vector)) - ) + if response.refclass is not None and family.name != "bernoulli": + raise ValueError("Index notation for response is only available for 'bernoulli' family") - if response.refclass is not None and self.family.name != "bernoulli": - raise ValueError("Index notation for response only available for 'bernoulli' family") - - self.response = ResponseTerm(response, prior, self.family.name) + self.family = family + self.response = ResponseTerm(response, family.prior, family.name) self.built = False def _add_common(self, common, priors): @@ -493,33 +445,14 @@ def _add_group_specific(self, group, priors): prior = priors.pop(name, priors.get("group_specific", None)) self.terms[name] = GroupSpecificTerm(name, term, data, prior) - def _match_derived_terms(self, name): - """Return all (group_specific) terms whose named are derived from the specified string. - - For example, ``'condition|subject'`` should match the terms with names ``'1|subject'``, - ``'condition[T.1]|subject'``, and so on. - Only works for strings with grouping operator ``('|')``. - """ - if "|" not in name: - return None - - patt = r"^([01]+)*[\s\+]*([^\|]+)*\|(.*)" - intcpt, pred, grpr = re.search(patt, name).groups() - intcpt = f"1|{grpr}" - if not pred: - return [self.terms[intcpt]] if intcpt in self.terms else None - - source = f"{pred}|{grpr}" - found = [ - t - for (n, t) in self.terms.items() - if n == intcpt or re.sub(r"(\[.*?\])", "", n) == source - ] - # If only the intercept matches, return None, because we want to err - # on the side of caution and not consider '1|subject' to be a match for - # 'condition|subject' if no slopes are found (e.g., the intercept could - # have been set by some other specification like 'gender|subject'). - return found if found and (len(found) > 1 or found[0].name != intcpt) else None + def _add_priors_cor(self, priors): + # priors: dictionary. names are groups, values are the "eta" in the lkj prior + groups = self._get_group_specific_groups() + for group in groups: + if group in priors: + self.priors_cor[group] = priors[group] + else: + raise KeyError(f"The name {group} is not a group in any group-specific term.") def plot_priors( self, @@ -670,11 +603,10 @@ def prior_predictive(self, draws=500, var_names=None, omit_offsets=True, random_ # pps_ keys are not in the same order as `var_names` because `var_names` is converted # to set within pm.sample_prior_predictive() pps = {name: pps_[name] for name in var_names} - response_name = self.response.name if response_name in pps: - prior_predictive = {response_name: np.moveaxis(pps.pop(response_name), 2, 0)} + prior_predictive = {response_name: pps.pop(response_name)} observed_data = {response_name: self.response.data.squeeze()} else: prior_predictive = {} @@ -813,22 +745,47 @@ def _get_pymc_coords(self): coords.update(**term.pymc_coords) return coords + def _get_group_specific_groups(self): + groups = {} + for term_name in self.group_specific_terms: + factor = term_name.split("|")[1] + if factor not in groups: + groups[factor] = [term_name] + else: + groups[factor].append(term_name) + return groups + def __str__(self): - priors = [f" {term.name} ~ {term.prior}" for term in self.terms.values()] - # Priors for nuisance parameters, i.e., standard deviation in normal linear model - priors_extra_params = [ - f" {k} ~ {v}" + priors = "" + priors_common = [f" {t.name} ~ {t.prior}" for t in self.common_terms.values()] + priors_group = [f" {t.name} ~ {t.prior}" for t in self.group_specific_terms.values()] + + # Prior for the correlation matrix in group-specific terms + priors_cor = [f" {k} ~ LKJCorr({v})" for k, v in self.priors_cor.items()] + + # Priors for auxiliary parameters, e.g., standard deviation in normal linear model + priors_aux = [ + f" {k} ~ {v}" for k, v in self.family.prior.args.items() if k not in ["observed", self.family.parent] ] - priors += priors_extra_params + + if priors_common: + priors += "\n".join([" Common-level effects", *priors_common]) + "\n\n" + if priors_group: + priors += "\n".join([" Group-level effects", *priors_group]) + "\n\n" + if priors_cor: + priors += "\n".join([" Group-level correlation", *priors_cor]) + "\n\n" + if priors_aux: + priors += "\n".join([" Auxiliary parameters", *priors_aux]) + "\n\n" + str_list = [ f"Formula: {self.formula}", f"Family name: {self.family.name.capitalize()}", f"Link: {self.family.link}", f"Observations: {self.response.data.shape[0]}", "Priors:", - "\n".join(priors), + priors, ] if self.backend and self.backend.fit: extra_foot = "------\n" diff --git a/bambi/priors/__init__.py b/bambi/priors/__init__.py index c0a205248..e4ca7e0ac 100644 --- a/bambi/priors/__init__.py +++ b/bambi/priors/__init__.py @@ -1,4 +1,5 @@ from .priors import Family, Prior, PriorFactory -from .scaler import PriorScaler +from .scaler_mle import PriorScalerMLE +from .scaler_default import PriorScaler -__all__ = ["Prior", "PriorFactory", "PriorScaler", "Family"] +__all__ = ["Family", "Prior", "PriorFactory", "PriorScaler", "PriorScalerMLE"] diff --git a/bambi/priors/config/priors.json b/bambi/priors/config/priors.json index a6c6fa146..db823388a 100644 --- a/bambi/priors/config/priors.json +++ b/bambi/priors/config/priors.json @@ -1,170 +1,183 @@ { "terms": { - "intercept": "#normal", - "common": "#normal", - "group_specific": [ - "#normal", - { + "intercept": { + "dist": "#normal" + }, + "common": { + "dist": "#normal" + }, + "group_specific": { + "dist": "#normal", + "hyper": { "sigma": "#halfnormal" } - ], - "intercept_flat": "#flat", - "common_flat": "#flat", - "group_specific_flat": [ - "#normal", - { + }, + "intercept_flat": { + "dist": "#flat" + }, + "common_flat": { + "dist": "#flat" + }, + "group_specific_flat": { + "dist": "#normal", + "hyper": { "sigma": "#halfflat" } - ] + } }, "families": { "gaussian": { - "dist": [ - "#normal", - { + "dist": { + "dist": "#normal", + "args": { "sigma": "#halfnormal" } - ], + }, "link": "identity", "parent": "mu" }, "bernoulli": { - "dist": [ - "#bernoulli", - { + "dist": { + "dist": "#bernoulli", + "args": { "p": "#beta" } - ], + }, "link": "logit", "parent": "p" }, "poisson": { - "dist": [ - "#poisson", - { + "dist": { + "dist": "#poisson", + "args": { "mu": "#halfcauchy" } - ], + }, "link": "log", "parent": "mu" }, "t": { - "dist": [ - "#t", - { + "dist": { + "dist": "#t", + "args": { "lam": "#halfcauchy" } - ], + }, "link": "identity", "parent": "mu" }, "negativebinomial": { - "dist": [ - "#negativebinomial", - { + "dist": { + "dist": "#negativebinomial", + "args": { "alpha": "#halfcauchy" } - ], + }, "link": "log", "parent": "mu" }, "gamma": { - "dist": [ - "#gamma", { + "dist": { + "dist": "#gamma", + "args": { "alpha": "#halfcauchy" } - ], + }, "link": "inverse", "parent": "mu" }, "wald": { - "dist": [ - "#wald", { + "dist": { + "dist": "#wald", + "args": { "lam": "#halfcauchy" } - ], + }, "link": "inverse_squared", "parent": "mu" } }, "dists": { - "normal": [ - "Normal", - { + "normal": { + "name": "Normal", + "args": { "mu": 0, "sigma": 1 } - ], - "cauchy": [ - "Cauchy", - { + }, + "cauchy": { + "name": "Cauchy", + "args": { "alpha": 0, "beta": 1 } - ], - "halfcauchy": [ - "HalfCauchy", - { + }, + "halfcauchy": { + "name": "HalfCauchy", + "args": { "beta": 1 } - ], - "halfnormal": [ - "HalfNormal", - { + }, + "halfnormal": { + "name": "HalfNormal", + "args": { "sigma": 1 } - ], - "flat": [ - "Flat", - {} - ], - "halfflat": [ - "HalfFlat", - {} - ], - "beta": [ - "Beta", - { + }, + "flat": { + "name": "Flat", + "args": {} + }, + "halfflat": { + "name": "HalfFlat", + "args": {} + }, + "beta": { + "name": "Beta", + "args": { "alpha": 1, "beta": 1 } - ], - "poisson": [ - "Poisson", - { + }, + "poisson": { + "name": "Poisson", + "args": { "mu": 1 } - ], - "t": [ - "StudentT", - { + }, + "t": { + "name": "StudentT", + "args": { "lam": 1, "nu": 1 } - ], - "bernoulli": [ - "Bernoulli", - { + }, + "bernoulli": { + "name": "Bernoulli", + "args": { "p": 0.5 } - ], - "negativebinomial": [ - "NegativeBinomial", { + }, + "negativebinomial": { + "name": "NegativeBinomial", + "args": { "alpha": 1, "mu": 1 } - ], - "gamma": [ - "Gamma", { + }, + "gamma": { + "name": "Gamma", + "args": { "alpha": 2, "beta": 2 } - ], - "wald": [ - "Wald", { + }, + "wald": { + "name": "Wald", + "args": { "mu": 1, "lam": 1 } - ] + } } } \ No newline at end of file diff --git a/bambi/priors/priors.py b/bambi/priors/priors.py index 4711751b2..1af256d2b 100644 --- a/bambi/priors/priors.py +++ b/bambi/priors/priors.py @@ -2,7 +2,6 @@ import json import re - from copy import deepcopy from os.path import dirname, join @@ -73,7 +72,7 @@ def __str__(self): else: priors_str = ",\n ".join( [ - f"{k}: {np.round_(v, 8)}" if not isinstance(v, Prior) else f"{k}: {v}" + f"{k}: {np.round_(v, 4)}" if not isinstance(v, Prior) else f"{k}: {v}" for k, v in self.prior.args.items() if k not in ["observed", self.parent] ] @@ -100,7 +99,7 @@ class Prior: def __init__(self, name, scale=None, **kwargs): self.name = name - self._auto_scale = True + self.auto_scale = True self.scale = scale self.args = {} self.update(**kwargs) @@ -132,7 +131,7 @@ def __eq__(self, other): def __str__(self): args = ", ".join( [ - f"{k}: {np.round_(v, 8)}" if not isinstance(v, Prior) else f"{k}: {v}" + f"{k}: {np.round_(v, 4)}" if not isinstance(v, Prior) else f"{k}: {v}" for k, v in self.args.items() ] ) @@ -203,26 +202,32 @@ def __init__(self, defaults=None, dists=None, terms=None, families=None): self.terms = defaults["terms"] self.families = defaults["families"] - def _get_prior(self, spec, **kwargs): - if isinstance(spec, str): - spec = re.sub(r"^\#", "", spec) - return self._get_prior(self.dists[spec]) - elif isinstance(spec, (list, tuple)): - name, args = spec - if name.startswith("#"): - name = re.sub(r"^\#", "", name) - prior = self._get_prior(self.dists[name]) - else: - prior = Prior(name, **kwargs) - args = {k: self._get_prior(v) for (k, v) in args.items()} + def _get_family(self, family): + """Returns a default prior for a family specified by name.""" + config = self.families[family] + dist = config["dist"] + prior = self._get_dist(dist["dist"]) + args = {k: self._get_dist(v) for (k, v) in dist["args"].items()} + prior.update(**args) + return Family(family, prior, config["link"], config["parent"]) + + def _get_term(self, term): + config = self.terms[term] + prior = self._get_dist(config["dist"]) + if "hyper" in config: + args = {k: self._get_dist(v) for (k, v) in config["hyper"].items()} prior.update(**args) - return prior - else: - return spec + return prior + + def _get_dist(self, name): + if name.startswith("#"): + name = re.sub(r"^\#", "", name) + dist = self.dists[name] + return Prior(dist["name"], **dist["args"]) - # pylint: disable=inconsistent-return-statements def get(self, dist=None, term=None, family=None): """Retrieve default prior for a named distribution, term type, or family. + Only one of ``'dist'``, ``'term'`` or ``'family'`` can be passed. Parameters ---------- @@ -238,17 +243,28 @@ def get(self, dist=None, term=None, family=None): ``'gama'``, ``'wald'``, or ``'negativebinomial'``. """ + # One, and only one, of 'dist', 'term' or 'family' must be set. + args_count = sum(arg is not None for arg in [dist, term, family]) + if args_count == 0: + raise ValueError("One of 'dist', 'term' or 'family' is required.") + if args_count > 1: + raise ValueError("Only one of 'dist', 'term', and 'family' in the same call.") + if dist is not None: if dist not in self.dists: raise ValueError(f"{dist} is not a valid distribution name.") - return self._get_prior(self.dists[dist]) + prior = self._get_dist(dist) elif term is not None: if term not in self.terms: raise ValueError(f"{term} is not a valid term type.") - return self._get_prior(self.terms[term]) + prior = self._get_term(term) elif family is not None: if family not in self.families: raise ValueError(f"{family} is not a valid family name.") - _f = self.families[family] - prior = self._get_prior(_f["dist"]) - return Family(family, prior, _f["link"], _f["parent"]) + prior = self._get_family(family) + + return prior + + +# When using family, we have a distribution for the response, and another distribution +# for at least one argument of the resposne. diff --git a/bambi/priors/scaler_default.py b/bambi/priors/scaler_default.py new file mode 100644 index 000000000..a6f6b2b68 --- /dev/null +++ b/bambi/priors/scaler_default.py @@ -0,0 +1,103 @@ +import numpy as np + +from .priors import Prior + + +class PriorScaler: + """Scale prior distributions parameters.""" + + # Standard deviation multiplier. + STD = 2.5 + + def __init__(self, model): + self.model = model + self.has_intercept = any( + term.type == "intercept" for term in self.model.common_terms.values() + ) + self.priors = {} + + # Compute mean and std of the response + if self.model.family.name == "gaussian": + self.response_mean = np.mean(model.response.data) + self.response_std = np.std(self.model.response.data) + else: + self.response_mean = 0 + self.response_std = 1 + + def get_intercept_stats(self): + mu = self.response_mean + sigma = self.STD * self.response_std + return mu, sigma + + def get_slope_sigma(self, x): + return self.STD * (self.response_std / np.std(x)) + + def scale_response(self): + # Add cases for other families + if self.model.response.prior.auto_scale: + if self.model.family.name == "gaussian": + sigma = self.response_std + self.model.response.prior.update(sigma=Prior("HalfStudentT", nu=4, sigma=sigma)) + + def scale_intercept(self, term): + if term.prior.name != "Normal": + return + mu, sigma = self.get_intercept_stats() + term.prior.update(mu=mu, sigma=sigma) + + def scale_common(self, term): + if term.prior.name != "Normal": + return + + # As many zeros as columns in the data. It can be greater than 1 for categorical variables + mu = np.zeros(term.data.shape[1]) + sigma = np.zeros(term.data.shape[1]) + + # Iterate over columns in the data + for i, x in enumerate(term.data.T): + sigma[i] = self.get_slope_sigma(x) + + # Save and set prior + self.priors.update({term.name: {"mu": mu, "sigma": sigma}}) + term.prior.update(mu=mu, sigma=sigma) + + def scale_group_specific(self, term): + if term.prior.args["sigma"].name != "HalfNormal": + return + + # Recreate the corresponding common effect data + data_as_common = term.predictor + + # Handle intercepts + if term.type == "intercept": + _, sigma = self.get_intercept_stats() + # Handle slopes + else: + sigma = np.zeros(data_as_common.shape[1]) + for i, x in enumerate(data_as_common.T): + sigma[i] = self.get_slope_sigma(x) + + term.prior.args["sigma"].update(sigma=np.squeeze(np.atleast_1d(sigma))) + + def scale(self): + # Scale response + self.scale_response() + + # Scale intercept + if self.has_intercept: + term = [t for t in self.model.common_terms.values() if t.type == "intercept"][0] + if term.prior.auto_scale: + self.scale_intercept(term) + + # Scale common terms + for term in self.model.common_terms.values(): + # Note: Intercept will be separated from common intercepts in the future. + if term.type == "intercept": + continue + if term.prior.auto_scale: + self.scale_common(term) + + # Scale group-specific terms + for term in self.model.group_specific_terms.values(): + if term.prior.auto_scale: + self.scale_group_specific(term) diff --git a/bambi/priors/scaler.py b/bambi/priors/scaler_mle.py similarity index 94% rename from bambi/priors/scaler.py rename to bambi/priors/scaler_mle.py index 9deda138d..993daa8f0 100644 --- a/bambi/priors/scaler.py +++ b/bambi/priors/scaler_mle.py @@ -10,11 +10,13 @@ from statsmodels.genmod.generalized_linear_model import GLM from statsmodels.tools.sm_exceptions import PerfectSeparationError +from .priors import Prior -class PriorScaler: + +class PriorScalerMLE: """Scale prior distributions parameters. - Used internally. + Used internally. Based on https://arxiv.org/abs/1702.01201 """ # Default is 'wide'. The wide prior sigma is sqrt(1/3) = .577 on the partial @@ -32,7 +34,7 @@ def __init__(self, model, taylor): else: self.dm = pd.DataFrame() - self.has_intercept = any(term == "Intercept" for term in self.model.terms) + self.has_intercept = any(term.type == "intercept" for term in self.model.terms.values()) self.priors = {} self.mle = None @@ -109,6 +111,13 @@ def get_slope_stats(self, exog, name, values, sigma_corr, points=4, full_model=N ] return np.array(terms).sum() ** 0.5 + def scale_response(self): + if self.model.response.prior.auto_scale: + if self.model.family.name == "gaussian": + sigma = np.std(self.model.response.data) + self.model.response.prior.update(sigma=Prior("HalfStudentT", nu=4, sigma=sigma)) + # Add cases for other families + def scale_common(self, term): """Scale common terms, excluding intercepts.""" # Defaults are only defined for Normal priors @@ -215,24 +224,27 @@ def scale(self): + ["group_specific"] * len(group_specific) ) + # Scale response + self.scale_response() + # Initialize terms in order for term, term_type in zip(terms, term_types): # Only scale priors if term or model is set to be auto scaled. # By default, use "wide". + if not term.prior.auto_scale: + continue + if term.prior.scale is None: - # pylint: disable=protected-access - if not (term.prior._auto_scale and self.model.auto_scale): - continue term.prior.scale = "wide" - if self.mle is None: - self.fit_mle() - # Convert scale names to floats if isinstance(term.prior.scale, str): term.prior.scale = self.names[term.prior.scale] - # Scale it + if self.mle is None: + self.fit_mle() + + # Scale term with the appropiate method getattr(self, f"scale_{term_type}")(term) def fit_mle(self): diff --git a/bambi/terms.py b/bambi/terms.py index 12169dd8a..d8042b0d4 100644 --- a/bambi/terms.py +++ b/bambi/terms.py @@ -63,6 +63,13 @@ def __init__(self, name, term_dict, data, prior=None): else: self.categorical = self.type == "categoric" + # Flag constant terms + if self.categorical and len(term_dict["levels"]) == 1 and (data == data[0]).all(): + raise ValueError(f"The term '{name}' has only 1 category!") + + if not self.categorical and self.type != "intercept" and np.all(data == data[0]): + raise ValueError(f"The term '{name}' is constant!") + # Flag cell-means terms (i.e., full-rank coding), which receive special priors # To flag intercepts we use `self.type` self.is_cell_means = self.categorical and (self.data.sum(1) == 1).all() @@ -119,9 +126,10 @@ def __init__(self, name, term, data, prior=None): else: self.categorical = self.type == "categoric" - # Determine if it is cell means + # Determine if the term represents cell-means encoding. self.is_cell_means = self.categorical and (self.data.sum(1) == 1).all() + # Used in pymc3 model coords to label coordinates appropiately self.pymc_coords = {} # Group is always a coordinate added to the model. expr, factor = self.name.split("|") diff --git a/bambi/tests/data/sample_priors.json b/bambi/tests/data/sample_priors.json index 685bbfbaf..3af7a9a00 100644 --- a/bambi/tests/data/sample_priors.json +++ b/bambi/tests/data/sample_priors.json @@ -1,47 +1,51 @@ { "terms": { - "yellow": "#swiss", - "white": "#feta" + "yellow": { + "dist": "#swiss" + }, + "white": { + "dist": "#feta" + } }, "families": { "hard": { - "dist": [ - "#asiago", - { + "dist": { + "dist": "#asiago", + "args": { "backup": "#parmesan" } - ], + }, "link": "grate", "parent": "wedge" } }, "dists": { - "swiss": [ - "Swiss", - { + "swiss": { + "name": "Swiss", + "args": { "holes": 100, "flavor": 100 } - ], - "feta": [ - "Feta", - { + }, + "feta": { + "name": "Feta", + "args": { "holes": 0, "hardness": -5 } - ], - "asiago": [ - "Asiago", - { + }, + "asiago": { + "name": "Asiago", + "args": { "hardness": 10, "backup": 0 } - ], - "parmesan": [ - "Parmesan", - { + }, + "parmesan": { + "name": "Parmesan", + "args": { "flavor": 10000 } - ] + } } } \ No newline at end of file diff --git a/bambi/tests/test_built_models.py b/bambi/tests/test_built_models.py index a0bd708dc..74384a77e 100644 --- a/bambi/tests/test_built_models.py +++ b/bambi/tests/test_built_models.py @@ -98,7 +98,7 @@ def test_many_common_many_group_specific(crossed_data): crossed_data_missing.loc[1, "continuous"] = np.nan crossed_data_missing.loc[2, "threecats"] = np.nan - # Here I'm comparing implicit/explicit intercepts for group specific effects workY the same way. + # Here I'm comparing implicit/explicit intercepts for group specific effects work the same way. model0 = Model( "Y ~ continuous + dummy + threecats + (threecats|subj) + (1|item) + (0+continuous|item) + (dummy|item) + (threecats|site)", crossed_data_missing, @@ -272,6 +272,12 @@ def test_cell_means_with_many_group_specific_effects(crossed_data): assert set(priors0) == set(priors1) +def test_group_specific_categorical_interaction(crossed_data): + crossed_data["fourcats"] = sum([[x] * 10 for x in ["a", "b", "c", "d"]], list()) * 3 + model = Model("Y ~ continuous + (threecats:fourcats|site)", crossed_data) + model.fit(tune=10, draws=10) + + @pytest.mark.skip(reason="We are correctly handling string links only, not functions.") def test_logistic_regression(crossed_data): # Tests passing link="logit" is equivalent to using tt.nnet.sigmoid @@ -422,7 +428,13 @@ def test_laplace(): def test_prior_predictive(crossed_data): crossed_data["count"] = (crossed_data["Y"] - crossed_data["Y"].min()).round() - model = Model("count ~ threecats + continuous + dummy", crossed_data, family="poisson") + # New default priors are too wide for this case... something to keep investigating + model = Model( + "count ~ threecats + continuous + dummy", + crossed_data, + family="poisson", + automatic_priors="mle", + ) model.fit(tune=0, draws=2) pps = model.prior_predictive(draws=500) @@ -432,7 +444,7 @@ def test_prior_predictive(crossed_data): for key, shape in zip(keys, shapes): assert pps.prior[key].shape == shape - assert pps.prior_predictive["count"].shape == (1, 500, 120) + assert pps.prior_predictive["count"].shape == (500, 120) assert pps.observed_data["count"].shape == (120,) pps = model.prior_predictive(draws=500, var_names=["count"]) @@ -475,3 +487,21 @@ def test_gamma_regression(dm): data = dm[["order", "ind_mg_dry"]] model = Model("ind_mg_dry ~ order", data, family="gamma", link="log") model.fit() + + +def test_plot_priors(crossed_data): + model = Model("Y ~ 0 + threecats", crossed_data) + # Priors cannot be plotted until model is built. + with pytest.raises(ValueError): + model.plot_priors() + model.build() + model.plot_priors() + + +def test_model_graph(crossed_data): + model = Model("Y ~ 0 + threecats", crossed_data) + # Graph cannot be plotted until model is built. + with pytest.raises(ValueError): + model.graph() + model.build() + model.graph() diff --git a/bambi/tests/test_model_construction.py b/bambi/tests/test_model_construction.py index 25c45ed4b..2a2fd9d6c 100644 --- a/bambi/tests/test_model_construction.py +++ b/bambi/tests/test_model_construction.py @@ -15,6 +15,17 @@ from bambi.utils import link_match_family +@pytest.fixture(scope="module") +def data_numeric_xy(): + data = pd.DataFrame( + { + "y": np.random.normal(size=100), + "x": np.random.normal(size=100), + } + ) + return data + + @pytest.fixture(scope="module") def diabetes_data(): data_dir = join(dirname(__file__), "data") @@ -102,23 +113,55 @@ def test_model_init_from_filename(): assert "BMI" in model.data.columns +def test_model_init_bad_data(): + with pytest.raises(ValueError): + Model("y ~ x", {"x": 1}) + + +def test_model_categorical_argument(): + data = pd.DataFrame( + { + "y": np.random.normal(size=100), + "x": np.random.randint(2, size=100), + "z": np.random.randint(2, size=100), + } + ) + model = Model("y ~ 0 + x", data, categorical="x") + assert model.terms["x"].categorical + + model = Model("y ~ 0 + x*z", data, categorical=["x", "z"]) + assert model.terms["x"].categorical + assert model.terms["z"].categorical + assert model.terms["x:z"].categorical + + +def test_model_no_response(): + with pytest.raises(ValueError): + Model("x", pd.DataFrame({"x": [1]})) + + +def test_model_taylor_value(data_numeric_xy): + Model("y ~ x", data=data_numeric_xy, taylor=5) + + +def test_model_alternative_scaler(data_numeric_xy): + Model("y ~ x", data=data_numeric_xy, automatic_priors="mle") + + def test_model_term_names_property(diabetes_data): model = Model("BMI ~ age_grp + BP + S1", diabetes_data) - model.build() assert model.term_names == ["Intercept", "age_grp", "BP", "S1"] def test_model_term_names_property_interaction(crossed_data): crossed_data["fourcats"] = sum([[x] * 10 for x in ["a", "b", "c", "d"]], list()) * 3 model = Model("Y ~ threecats*fourcats", crossed_data) - model.build() assert model.term_names == ["Intercept", "threecats", "fourcats", "threecats:fourcats"] def test_model_terms_levels_interaction(crossed_data): crossed_data["fourcats"] = sum([[x] * 10 for x in ["a", "b", "c", "d"]], list()) * 3 model = Model("Y ~ threecats*fourcats", crossed_data) - model.build() assert model.terms["threecats:fourcats"].levels == [ "threecats[b]:fourcats[b]", @@ -141,7 +184,6 @@ def test_model_terms_levels(): } ) model = Model("y ~ x + z + time + (time|subject)", data) - model.build() assert model.terms["z"].levels == ["z[Group 2]", "z[Group 3]"] assert model.terms["1|subject"].groups == [f"Subject {x}" for x in range(1, 6)] assert model.terms["time|subject"].groups == [f"Subject {x}" for x in range(1, 6)] @@ -158,7 +200,6 @@ def test_model_term_classes(): ) model = Model("y ~ x*g + (x|s)", data) - model.build() assert isinstance(model.terms["x"], Term) assert isinstance(model.terms["g"], Term) @@ -178,27 +219,6 @@ def test_one_shot_formula_fit(diabetes_data): assert len(set(named_vars.keys()) & set(targets)) == 3 -@pytest.mark.skip(reason="Derived term search is going to be removed.") -def test_derived_term_search(diabetes_data): - model = Model("BMI ~ 1 + (age_grp|BP)", diabetes_data, categorical=["age_grp"]) - model.build() - terms = model._match_derived_terms("age_grp|BP") - names = set([t.name for t in terms]) - - # Since intercept is present, it uses treatment encoding - lvls = sorted(list(diabetes_data["age_grp"].unique()))[1:] - assert names == set(["1|BP"] + [f"age_grp[{lvl}]|BP" for lvl in lvls]) - - term = model._match_derived_terms("1|BP")[0] - assert term.name == "1|BP" - - # All of these should find nothing - assert model._match_derived_terms("1|ZZZ") is None - assert model._match_derived_terms("ZZZ|BP") is None - assert model._match_derived_terms("BP") is None - assert model._match_derived_terms("BP") is None - - def test_categorical_term(): data = pd.DataFrame( { @@ -436,3 +456,19 @@ def test_link_match_family(): for family in custom_families: assert link_match_family("anything", family) + + +def test_constant_terms(): + data = pd.DataFrame( + { + "y": np.random.normal(size=10), + "x": np.random.choice([1], size=10), + "z": np.random.choice(["A"], size=10), + } + ) + + with pytest.raises(ValueError): + Model("y ~ 0 + x", data) + + with pytest.raises(ValueError): + Model("y ~ 0 + z", data) diff --git a/bambi/tests/test_priors.py b/bambi/tests/test_priors.py index 1f897dc5d..197927010 100644 --- a/bambi/tests/test_priors.py +++ b/bambi/tests/test_priors.py @@ -1,4 +1,5 @@ import json +from multiprocessing.sharedctypes import Value from os.path import dirname, join import numpy as np @@ -47,6 +48,17 @@ def test_prior_factory_init_from_default_config(): assert "gaussian" in pf.families +def test_prior_factory_get_fail(): + # .get() must receive only, and only one, non None argument. + pf = PriorFactory() + with pytest.raises(ValueError): + assert pf.get(dist="ñam", term="fri", family="frufi") + with pytest.raises(ValueError): + assert pf.get(dist="fali", term="fru") + with pytest.raises(ValueError): + assert pf.get() + + def test_prior_factory_init_from_config(): config_file = join(dirname(__file__), "data", "sample_priors.json") pf = PriorFactory(config_file) @@ -101,7 +113,6 @@ def test_auto_scale(diabetes_data): # By default, should scale everything except custom Prior() objects priors = {"S1": 0.3, "BP": Prior("Cauchy", alpha=1, beta=17.5)} model = Model("BMI ~ S1 + S2 + BP", diabetes_data, priors=priors) - model.build(backend="pymc3") p1 = model.terms["S1"].prior p2 = model.terms["S2"].prior p3 = model.terms["BP"].prior @@ -111,29 +122,130 @@ def test_auto_scale(diabetes_data): assert p3.name == "Cauchy" assert p3.args["beta"] == 17.5 - # With auto_scale off, everything should be flat unless explicitly named in priors + # With auto_scale off, custom priors are considered, but not custom scaling. + # Prior has no effect, and prior for BP has effect. + priors = {"S1": 0.3, "BP": Prior("Cauchy", alpha=1, beta=17.5)} model = Model("BMI ~ S1 + S2 + BP", diabetes_data, priors=priors, auto_scale=False) - model.build(backend="pymc3") p1_off = model.terms["S1"].prior p2_off = model.terms["S2"].prior p3_off = model.terms["BP"].prior assert p1_off.name == "Normal" assert p2_off.name == "Flat" - assert 0 < p1_off.args["sigma"] < 1 + assert p1_off.args["sigma"] == 1 assert "sigma" not in p2_off.args assert p3_off.name == "Cauchy" - assert p3_off.args["beta"] == 17.5 + + +def test_prior_str(): + # Tests __str__ method + prior1 = Prior("Normal", mu=0, sigma=1) + prior2 = Prior("Normal", mu=0, sigma=Prior("HalfNormal", sigma=1)) + assert str(prior1) == "Normal(mu: 0, sigma: 1)" + assert str(prior2) == "Normal(mu: 0, sigma: HalfNormal(sigma: 1))" + assert str(prior1) == repr(prior1) + + +def test_prior_eq(): + # Tests __eq__ method + prior1 = Prior("Normal", mu=0, sigma=1) + prior2 = Prior("Normal", mu=0, sigma=Prior("HalfNormal", sigma=1)) + assert prior1 == prior1 + assert prior2 == prior2 + assert prior1 != prior2 + assert prior1 != "Prior" + + +def test_family_unsupported(): + family = Family("name", "prior", "link", "parent") + with pytest.raises(ValueError): + family._set_link("Empty") + + +def test_family_bad_type(): + data = pd.DataFrame({"x": [1], "y": [1]}) + + with pytest.raises(ValueError): + Model("y ~ x", data, family=0) + + with pytest.raises(ValueError): + Model("y ~ x", data, family=set("gaussian")) + + with pytest.raises(ValueError): + Model("y ~ x", data, family={"family": "gaussian"}) + + +def test_family_unsupported_index_notation(): + data = pd.DataFrame({"x": [1], "y": [1]}) + with pytest.raises(ValueError): + Model("y[1] ~ x", data, family="gaussian") def test_complete_separation(): data = pd.DataFrame({"y": [0] * 5 + [1] * 5, "g": ["a"] * 5 + ["b"] * 5}) with pytest.raises(PerfectSeparationError): - Model("y ~ g", data, family="bernoulli").fit() + Model("y ~ g", data, family="bernoulli", automatic_priors="mle") # No error is raised priors = {"common": Prior("Normal", mu=0, sigma=10)} - Model("y ~ g", data, family="bernoulli", priors=priors).fit() + Model("y ~ g", data, family="bernoulli", priors=priors) + + +def test_set_priors(): + data = pd.DataFrame( + { + "y": np.random.normal(size=100), + "x": np.random.normal(size=100), + "g": np.random.choice(["A", "B"], size=100), + } + ) + model = Model("y ~ x + (1|g)", data) + prior = Prior("Uniform", lower=0, upper=50) + + # Common + model.set_priors(common=prior) + assert model.terms["Intercept"].prior == prior + assert model.terms["x"].prior == prior + + # Group-specific + model.set_priors(group_specific=prior) + assert model.terms["1|g"].prior == prior + + # By name + model = Model("y ~ x + (1|g)", data) + model.set_priors(priors={"x": prior}) + model.set_priors(priors={"1|g": prior}) + assert model.terms["x"].prior == prior + assert model.terms["1|g"].prior == prior + + +def test_set_prior_with_tuple(): + data = pd.DataFrame( + { + "y": np.random.normal(size=100), + "x": np.random.normal(size=100), + "z": np.random.normal(size=100), + } + ) + prior = Prior("Uniform", lower=0, upper=50) + model = Model("y ~ x + z", data) + model.set_priors(priors={("x", "z"): prior}) + + assert model.terms["x"].prior == prior + assert model.terms["z"].prior == prior + + +def test_set_prior_unexisting_term(): + data = pd.DataFrame( + { + "y": np.random.normal(size=100), + "x": np.random.normal(size=100), + } + ) + prior = Prior("Uniform", lower=0, upper=50) + model = Model("y ~ x", data) + with pytest.raises(ValueError): + model.set_priors(priors={("x", "z"): prior}) def test_response_prior(): diff --git a/bambi/utils.py b/bambi/utils.py index e71df5921..3456dd0bd 100644 --- a/bambi/utils.py +++ b/bambi/utils.py @@ -1,5 +1,16 @@ from .priors import Family +FAMILY_LINKS = { + "gaussian": ["identity", "log", "inverse"], + "gamma": ["identity", "log", "inverse"], + "bernoulli": ["identity", "logit", "probit", "cloglog"], + "wald": ["inverse", "inverse_squared", "identity", "log"], + "negativebinomial": ["identity", "log", "cloglog"], + "poisson": ["identity", "log"], +} + +FAMILY_PARAMS = {"gaussian": "sigma", "negativebinomial": "alpha", "gamma": "alpha"} + def listify(obj): """Wrap all non-list or tuple objects in a list. @@ -13,28 +24,37 @@ def listify(obj): def extract_family_prior(family, priors): - # Given a family and a dictionary of priors, return the prior that could be - # passed to update the prior of a parameter in that family. - - # Only built-in gaussian and negativebinomial have nuisance parameters - # i.e, parameters not related to the link-transformed predicted outcome - if isinstance(family, str): - if family == "gaussian" and "sigma" in priors: - return {"sigma": priors["sigma"]} - elif family == "negativebinomial" and "alpha" in priors: - return {"alpha": priors["alpha"]} - elif family == "gamma" and "alpha" in priors: - return {"alpha": priors["alpha"]} + """Extract priors for a given family + + If a key in the priors dictionary matches the name of a nuisance parameter of the response + distribution for the given family, this function extracts and returns the prior for that + nuisance parameter. The result of this function can be safely used to update the ``Prior`` of + the response term. + + Parameters + ---------- + family: str or ``Family`` + The name of a built-in family or a ``Family`` instance. + priors: dict + A dictionary where keys represent parameter/term names and values represent + prior distributions. + """ + + if isinstance(family, str) and family in FAMILY_PARAMS: + name = FAMILY_PARAMS[family] + prior = priors.pop(name, None) + if prior: + return {name: prior} elif isinstance(family, Family): # Only work if there are nuisance parameters in the family, and if any of these nuisance # parameters is present in 'priors' dictionary. nuisance_params = [k for k in family.prior.args if k not in ["observed", family.parent]] if set(nuisance_params).intersection(set(priors)): - return {k: priors[k] for k in nuisance_params if k in priors} + return {k: priors.pop(k) for k in nuisance_params if k in priors} return None -def link_match_family(link, family_name): # pylint: disable= too-many-return-statements +def link_match_family(link, family_name): """Checks whether the a link can be used in a given family. When this function is used with built-in family names, it tests whether the link name can be @@ -42,17 +62,8 @@ def link_match_family(link, family_name): # pylint: disable= too-many-return-st the user is working with a custom ``Family`` object. Which links can work with which families are taken from statsmodels. """ - if family_name == "gaussian": - return link in ["identity", "log", "inverse"] - elif family_name == "gamma": - return link in ["identity", "log", "inverse"] - elif family_name == "bernoulli": - return link in ["identity", "logit", "probit", "cloglog"] - elif family_name == "wald": - return link in ["inverse", "inverse_squared", "identity", "log"] - elif family_name == "negativebinomial": - return link in ["identity", "log", "cloglog"] - elif family_name == "poisson": - return link in ["identity", "log"] - else: # Custom family, we don't know what link functions can be used - return True + if family_name in FAMILY_LINKS: + return link in FAMILY_LINKS[family_name] + + # Custom family, we don't know what link functions can be used + return True