From ba7aec15c86dc67c75419798994c1f88c4ccdc5d Mon Sep 17 00:00:00 2001 From: Benjamin Johnson Date: Tue, 29 Jun 2021 10:29:07 -0400 Subject: [PATCH 01/33] Start work on Observation class, trying to maintain backwards compatibility with obs dicts. --- prospect/utils/observation.py | 154 ++++++++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 prospect/utils/observation.py diff --git a/prospect/utils/observation.py b/prospect/utils/observation.py new file mode 100644 index 00000000..02209de3 --- /dev/null +++ b/prospect/utils/observation.py @@ -0,0 +1,154 @@ +# -*- coding: utf-8 -*- + +import json +import numpy as np + + +class NumpyEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, np.ndarray): + return obj.tolist() + if isinstance(obj, type): + return str(obj) + return json.JSONEncoder.default(self, obj) + + +class Observation: + + logify_spectrum = False + alias = {} + + def __init__(self, + flux=None, + uncertainty=None, + mask=slice(None), + **kwargs + ): + + self.flux = flux + self.uncertainty = uncertainty + self.mask = mask + self.from_oldstyle(**kwargs) + + def __getitem__(self, item): + """Dict-like interface for backwards compatibility + """ + k = self.alias.get(item, item) + return getattr(self, k) + + def from_oldstyle(self, **kwargs): + """Take an old-style obs dict and use it to populate the relevant + attributes. + """ + for k, v in self.alias.items(): + if k in kwargs: + setattr(self, v, kwargs[k]) + + def rectify(self): + """Make sure required attributes are present and have the appropriate + sizes. Also auto-masks non-finite data or negative uncertainties. + """ + assert self.wavelength.ndim == 1, "`wavelength` is not 1-d array" + assert self.ndata > 0, "no wavelength points supplied!" + assert len(self.wavelength) == len(self.flux), "Flux array not same shape as wavelength" + assert len(self.wavelength) == len(self.uncertainty), "Uncertainty array not same shape as wavelength" + + # make mask array with automatic filters + marr = np.zeros(self.ndata, dtype=bool) + marr[self.mask] = True + self.mask = (marr & + (np.isfinite(self.flux)) & + (np.isfinite(self.uncertainty)) & + (self.uncertainty > 0)) + + assert self.ndof > 0, "No valid data to fit: check the sign of the masks." + + def render(self, wavelength, spectrum): + raise(NotImplementedError) + + @property + def ndof(self): + return int(self.mask.sum()) + + @property + def ndata(self): + if self.wavelength is None: + return 0 + else: + return len(self.wavelength) + + def serialize(self): + obs = vars(self) + serial = json.dumps(obs, cls=NumpyEncoder) + + +class Photometry(Observation): + + kind = "photometry" + alias = dict(maggies="flux", + maggies_unc="uncertainty", + filters="filters", + phot_mask="mask") + + def __init__(self, filters=[], **kwargs): + + super(Photometry, self).__init__(**kwargs) + self.filters = filters + + def render(self, wavelength, spectrum): + w, s = wavelength, spectrum + mags = [f.ab_mag(w, s, **self.render_kwargs) + for f in self.filters] + return 10**(-0.4 * np.array(mags)) + + @property + def wavelength(self): + return np.array([f.wave_effective for f in self.filters]) + + def to_oldstyle(self): + obs = vars(self) + obs.update({k: self[v] for k, v in self.alias.items()}) + _ = [obs.pop(k) for k in ["flux", "uncertainty", "mask"]] + obs["phot_wave"] = self.wavelength + return obs + + +class Spectrum(Observation): + + kind = "spectrum" + alias = dict(spectrum="flux", + unc="uncertainty", + wavelength="wavelength", + mask="mask") + + def __init__(self, + wavelength=None, + resolution=None, + calibration=None, + **kwargs): + + """ + :param resolution: (optional, default: None) + Instrumental resolution at each wavelength point in units of km/s + dispersion (:math:`= c \, \sigma_\lambda / \lambda = c \, \FWHM_\lambda / 2.355 / \lambda = c / (2.355 \, R_\lambda)` + where :math:`c=2.998e5 {\rm km}/{\rm s}` + + :param calibration: + not sure yet .... + """ + super(Spectrum, self).__init__(**kwargs) + self.wavelength = wavelength + self.resolution = resolution + self.calibration = calibration + + def render(self, wavelength, spectrum): + if self.ndata > 0: + wave = self.wavelength + spec = np.interp(wave, wavelength, spectrum) + return wave, spec + + def to_oldstyle(self): + obs = vars(self) + obs.update({k: self[v] for k, v in self.alias.items()}) + _ = [obs.pop(k) for k in ["flux", "uncertainty"]] + return obs From 501d9525f8cb62fc2ab3c9c19ce55488716b8c3d Mon Sep 17 00:00:00 2001 From: Benjamin Johnson Date: Tue, 1 Feb 2022 14:05:01 -0500 Subject: [PATCH 02/33] Working basic predictions with multiple spectra. Also add logic for caching emission line variances from MLE. --- prospect/models/sedmodel.py | 105 +++++++++++++++++++--------------- prospect/utils/observation.py | 14 +++++ tests/test_predict.py | 63 ++++++++++++++++++++ 3 files changed, 135 insertions(+), 47 deletions(-) create mode 100644 tests/test_predict.py diff --git a/prospect/models/sedmodel.py b/prospect/models/sedmodel.py index c41649c9..d92be6cc 100644 --- a/prospect/models/sedmodel.py +++ b/prospect/models/sedmodel.py @@ -41,6 +41,8 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.init_eline_info() + self.parse_elines() + def _available_parameters(self): new_pars = [("sigma_smooth", ""), ("marginalize_elines", ""), @@ -51,26 +53,24 @@ def _available_parameters(self): ("eline_sigma", ""), ("use_eline_priors", ""), ("eline_prior_width", "")] - relevant_pars = [("mass", ""), - ("lumdist", ""), - ("zred", ""), - ("nebemlineinspec", ""), - ("add_neb_emission")] + + referenced_pars = [("mass", ""), + ("lumdist", ""), + ("zred", ""), + ("nebemlineinspec", ""), + ("add_neb_emission")] return new_pars - def predict(self, theta, obs=None, sps=None, sigma_spec=None, **extras): + def predict(self, theta, obslist=None, sps=None, sigma_spec=None, **extras): """Given a ``theta`` vector, generate a spectrum, photometry, and any extras (e.g. stellar mass), including any calibration effects. :param theta: ndarray of parameter values, of shape ``(ndim,)`` - :param obs: - An observation dictionary, containing the output wavelength array, - the photometric filter lists, and the observed fluxes and - uncertainties thereon. Assumed to be the result of - :py:func:`utils.obsutils.rectify_obs` + :param obslist: + A list of `Observation` instances. :param sps: An `sps` object to be used in the model generation. It must have @@ -80,14 +80,17 @@ def predict(self, theta, obs=None, sps=None, sigma_spec=None, **extras): The covariance matrix for the spectral noise. It is only used for emission line marginalization. - :returns spec: - The model spectrum for these parameters, at the wavelengths - specified by ``obs['wavelength']``, including multiplication by the - calibration vector. Units of maggies + :returns predictions: (list of ndarrays) + List of predictions for the given list of observations. - :returns phot: - The model photometry for these parameters, for the filters - specified in ``obs['filters']``. Units of maggies. + If the observation kind is "spectrum" then this is the model spectrum for these + parameters, at the wavelengths specified by ``obs['wavelength']``, + including multiplication by the calibration vector. Units of + maggies + + If the observation kind is "photometry" then this is the model + photometry for these parameters, for the filters specified in + ``obs['filters']``. Units of maggies. :returns extras: Any extra aspects of the model that are returned. Typically this @@ -103,15 +106,28 @@ def predict(self, theta, obs=None, sps=None, sigma_spec=None, **extras): # Flux normalize self._norm_spec = self._spec * self.flux_norm() - # generate spectrum and photometry for likelihood - # predict_spec should be called before predict_phot - # because in principle it can modify the emission line parameters - # and also needs some things done in 'cache_eline_parameters` - # especially _ewave_obs and _use_elines - spec = self.predict_spec(obs, sigma_spec=sigma_spec) - phot = self.predict_phot(obs.get('filters', None)) + # cache eline observed wavelengths + eline_z = self.params.get("eline_delta_zred", 0.0) + self._ewave_obs = (1 + eline_z + self._zred) * self._eline_wave + # cache eline mle info + self._ln_eline_penalty = 0 + self._eline_lum_var = np.zeros_like(self._eline_wave) + + # generate predictions for likelihood + # this assumes all spectral datasets (if present) occur first + # because they can change the line strengths during marginalization. + predictions = [self.predict_one(obs, sigma_spec=sigma_spec) + for obs in obslist] - return spec, phot, self._mfrac + return predictions, self._mfrac + + def predict_one(self, obs, sigma_spec=None): + self.cache_eline_parameters(obs) + if obs.kind == "spectrum": + prediction = self.predict_spec(obs, sigma_spec) + elif obs.kind == "photometry": + prediction = self.predict_phot(obs["filters"]) + return prediction def predict_spec(self, obs, sigma_spec=None, **extras): """Generate a prediction for the observed spectrum. This method assumes @@ -139,10 +155,9 @@ def predict_spec(self, obs, sigma_spec=None, **extras): ``cache_eline_parameters()`` and ``fit_el()`` for details.) :param obs: - An observation dictionary, containing the output wavelength array, - the photometric filter lists, and the observed fluxes and - uncertainties thereon. Assumed to be the result of - :py:meth:`utils.obsutils.rectify_obs` + An instance of `Spectrum`, containing the output wavelength array, + the observed fluxes and uncertainties thereon. Assumed to be the + result of :py:meth:`utils.obsutils.rectify_obs` :param sigma_spec: (optional) The covariance matrix for the spectral noise. It is only used for @@ -160,9 +175,6 @@ def predict_spec(self, obs, sigma_spec=None, **extras): if self._outwave is None: self._outwave = obs_wave - # --- cache eline parameters --- - self.cache_eline_parameters(obs) - # --- smooth and put on output wavelength grid --- smooth_spec = self.smoothspec(obs_wave, self._norm_spec) @@ -340,29 +352,21 @@ def cache_eline_parameters(self, obs, nsigma=5, forcelines=False): redshift - first looks for ``eline_delta_zred``, and defaults to ``zred`` sigma - first looks for ``eline_sigma``, defaults to 100 km/s + N.B. This must be run separately for each `Observation` instance at each + likelihood call!!! + + param + :param nsigma: (float, optional, default: 5.) Number of sigma from a line center to use for defining which lines to fit and useful spectral elements for the fitting. float. """ - - # observed wavelengths - eline_z = self.params.get("eline_delta_zred", 0.0) - self._ewave_obs = (1 + eline_z + self._zred) * self._eline_wave - self._eline_lum_var = np.zeros_like(self._eline_wave) - - # masks for lines to be treated in various ways. - # always run this becuase it's need for spec *and* phot if adding lines - # by hand - self.parse_elines() - # exit gracefully if not adding lines. We also exit if only fitting # photometry, for performance reasons hasspec = obs.get('spectrum', None) is not None if not (self._want_lines & self._need_lines & hasspec): - self._fit_eline = None self._fit_eline_pixelmask = np.array([], dtype=bool) self._fix_eline_pixelmask = np.array([], dtype=bool) - self._fix_eline = None return # observed linewidths @@ -497,6 +501,7 @@ def fit_el(self, obs, calibrated_spec, sigma_spec=None): K = ln_mvn(alpha_hat, mean=alpha_hat, cov=sigma_alpha_hat) # Cache the ln-penalty + # FIXME this needs to be acumulated if there are multiple spectra self._ln_eline_penalty = K # Store fitted emission line luminosities in physical units @@ -637,7 +642,10 @@ def absolute_rest_maggies(self, filters): def mean_model(self, theta, obs, sps=None, sigma=None, **extras): """Legacy wrapper around predict() """ - return self.predict(theta, obs, sps=sps, sigma_spec=sigma, **extras) + from ..utils.observation import from_oldstyle + obslist = from_oldstyle(obs) + predictions, mfrac = self.predict(theta, obslist, sps=sps, sigma_spec=sigma, **extras) + return predictions[0], predictions[1], mfrac class PolySpecModel(SpecModel): @@ -662,6 +670,9 @@ def spec_calibration(self, theta=None, obs=None, spec=None, **kwargs): spectrum, conditional on all other parameters. If emission lines are being marginalized out, they are excluded from the least-squares fit. + :param obs: + Instance of `Spectrum` + :returns cal: A polynomial given by :math:`\sum_{m=0}^M a_{m} * T_m(x)`. """ diff --git a/prospect/utils/observation.py b/prospect/utils/observation.py index 02209de3..da159e8c 100644 --- a/prospect/utils/observation.py +++ b/prospect/utils/observation.py @@ -3,6 +3,8 @@ import json import numpy as np +__all__ = ["Observation", "Spectrum", "Photometry", + "from_oldstyle"] class NumpyEncoder(json.JSONEncoder): def default(self, obj): @@ -36,6 +38,12 @@ def __getitem__(self, item): k = self.alias.get(item, item) return getattr(self, k) + def get(self, item, default): + try: + return self[item] + except(AttributeError): + return default + def from_oldstyle(self, **kwargs): """Take an old-style obs dict and use it to populate the relevant attributes. @@ -152,3 +160,9 @@ def to_oldstyle(self): obs.update({k: self[v] for k, v in self.alias.items()}) _ = [obs.pop(k) for k in ["flux", "uncertainty"]] return obs + + +def from_oldstyle(obs): + """Convert from an oldstyle dictionary to a list of observations + """ + return [Spectrum().from_oldstyle(obs), Photometry().from_oldstyle(obs)] \ No newline at end of file diff --git a/tests/test_predict.py b/tests/test_predict.py new file mode 100644 index 00000000..a14d6f20 --- /dev/null +++ b/tests/test_predict.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import numpy as np + +from sedpy.observate import load_filters +from prospect.sources import CSPSpecBasis +from prospect.models import SpecModel, templates +from prospect.utils.observation import Spectrum, Photometry + + +def build_model(): + model_params = templates.TemplateLibrary["parametric_sfh"] + return SpecModel(model_params) + + +def build_obs(multispec=True): + N = 1500 * (2 - multispec) + wmax = 7000 + wsplit = wmax - N * multispec + + filterlist = load_filters([f"sdss_{b}0" for b in "ugriz"]) + Nf = len(filterlist) + phot = [Photometry(filters=filterlist, flux=np.ones(Nf), uncertainty=np.ones(Nf)/10)] + spec = [Spectrum(wavelength=np.linspace(4000, wsplit, N), + flux=np.ones(N), uncertainty=np.ones(N) / 10, + mask=slice(None))] + + if multispec: + spec += [Spectrum(wavelength=np.linspace(wsplit+1, wmax, N), + flux=np.ones(N), uncertainty=np.ones(N) / 10, + mask=slice(None))] + + obslist = spec + phot + [obs.rectify() for obs in obslist] + return obslist + + +def build_sps(): + sps = CSPSpecBasis(zcontinuous=1) + return sps + + +if __name__ == "__main__": + obslist_single = build_obs(multispec=False) + obslist = build_obs() + model = build_model() + sps = build_sps() + + #sys.exit() + predictions_single, mfrac = model.predict(model.theta, obslist=obslist_single, sps=sps) + #sys.exit() + predictions, mfrac = model.predict(model.theta, obslist=obslist, sps=sps) + + import matplotlib.pyplot as pl + fig, ax = pl.subplots() + ax.plot(obslist_single[0].wavelength, predictions_single[0]) + for p, o in zip(predictions, obslist): + if o.kind == "photometry": + ax.plot(o.wavelength, p, "o") + else: + ax.plot(o.wavelength, p) + From dc3c71c095e789e468eb517810f0fb64232a3806 Mon Sep 17 00:00:00 2001 From: Benjamin Johnson Date: Fri, 29 Apr 2022 10:58:28 -0400 Subject: [PATCH 03/33] Move noise models to observation objects. Offloads likelihood computations to NoiseModel objects that are attached to individual Observation instances. Also updates lnprobfn to handle lists of observations, and adds some docstrings and rough tests. fitting ubdates for observation lists; dosctring modernization. --- prospect/fitting/fitting.py | 343 +++++++++++++---------------- prospect/likelihood/likelihood.py | 225 +++---------------- prospect/likelihood/noise_model.py | 140 ++++++++++-- prospect/models/sedmodel.py | 73 +++--- prospect/utils/observation.py | 47 +++- tests/test_predict.py | 28 ++- 6 files changed, 390 insertions(+), 466 deletions(-) diff --git a/prospect/fitting/fitting.py b/prospect/fitting/fitting.py index dfc19a7e..e14ee930 100755 --- a/prospect/fitting/fitting.py +++ b/prospect/fitting/fitting.py @@ -16,8 +16,7 @@ from .minimizer import minimize_wrapper, minimizer_ball from .ensemble import run_emcee_sampler from .nested import run_dynesty_sampler -from ..likelihood import lnlike_spec, lnlike_phot, chi_spec, chi_phot, write_log -from ..utils.obsutils import fix_obs +from ..likelihood.likelihood import compute_chi, compute_lnlike __all__ = ["lnprobfn", "fit_model", @@ -25,67 +24,60 @@ ] -def lnprobfn(theta, model=None, obs=None, sps=None, noise=(None, None), +<<<<<<< HEAD +def lnprobfn(theta, model=None, observations=None, sps=None, noises=None, residuals=False, nested=False, negative=False, verbose=False): +======= +def lnprobfn(theta, model=None, observations=None, sps=None, + residuals=False, nested=False, verbose=False): +>>>>>>> 5617c8c (fitting ubdates for observation lists; dosctring modernization.) """Given a parameter vector and optionally a dictionary of observational ata and a model object, return the matural log of the posterior. This requires that an sps object (and if using spectra and gaussian processes, a NoiseModel) be instantiated. - :param theta: - Input parameter vector, ndarray of shape (ndim,) - - :param model: - SedModel model object, with attributes including ``params``, a - dictionary of model parameter state. It must also have - :py:func:`prior_product`, and :py:func:`predict` methods - defined. - - :param obs: - A dictionary of observational data. The keys should be - - + ``"wavelength"`` (angstroms) - + ``"spectrum"`` (maggies) - + ``"unc"`` (maggies) - + ``"maggies"`` (photometry in maggies) - + ``"maggies_unc"`` (photometry uncertainty in maggies) - + ``"filters"`` (:py:class:`sedpy.observate.FilterSet` or iterable of :py:class:`sedpy.observate.Filter`) - + and optional spectroscopic ``"mask"`` and ``"phot_mask"`` (same - length as ``spectrum`` and ``maggies`` respectively, True means use - the data points) - - :param sps: - A :py:class:`prospect.sources.SSPBasis` object or subclass thereof, or - any object with a ``get_spectrum`` method that will take a dictionary - of model parameters and return a spectrum, photometry, and ancillary - information. - - :param noise: (optional, default: (None, None)) - A 2-element tuple of :py:class:`prospect.likelihood.NoiseModel` objects. - - :param residuals: (optional, default: False) + Parameters + ---------- + theta : ndarray of shape ``(ndim,)`` + Input parameter vector + + model : instance of the :py:class:`prospect.models.SedModel` + The model parameterization and parameter state. Must have + :py:meth:`predict()` defined + + observations : A list of :py:class:`observation.Observation` instances + The data to be fit. + + sps : instance of a :py:class:`prospect.sources.SSPBasis` (sub-)class. + The object used to construct the basic physical spectral model. + Anything with a compatible :py:func:`get_galaxy_spectrum` can + be used here. It will be passed to ``lnprobfn`` + + residuals : bool (optional, default: False) A switch to allow vectors of :math:`\chi` values to be returned instead of a scalar posterior probability. This can be useful for least-squares optimization methods. Note that prior probabilities are not included in this calculation. - :param nested: (optional, default: False) + nested : bool (optional, default: False) If ``True``, do not add the ln-prior probability to the ln-likelihood when computing the ln-posterior. For nested sampling algorithms the prior probability is incorporated in the way samples are drawn, so should not be included here. - :param negative: (optiona, default: False) + negative: bool (optional, default: False) If ``True`` return the negative on the ln-probability for minimization purposes. - :returns lnp: + Returns + ------- + lnp : float or ndarry of shape `(ndof,)` Ln-probability, unless ``residuals=True`` in which case a vector of :math:`\chi` values is returned. """ if residuals: - lnnull = np.zeros(obs["ndof"]) - 1e18 # np.infty - #lnnull = -np.infty + ndof = np.sum([obs["ndof"] for obs in observations]) + lnnull = np.zeros(ndof) - 1e18 # -np.infty else: lnnull = -np.infty @@ -94,25 +86,16 @@ def lnprobfn(theta, model=None, obs=None, sps=None, noise=(None, None), if not np.isfinite(lnp_prior): return lnnull - # --- Update Noise Model --- - spec_noise, phot_noise = noise - vectors, sigma_spec = {}, None + # set parameters model.set_parameters(theta) - if spec_noise is not None: - spec_noise.update(**model.params) - vectors.update({"unc": obs.get('unc', None)}) - sigma_spec = spec_noise.construct_covariance(**vectors) - if phot_noise is not None: - phot_noise.update(**model.params) - vectors.update({'phot_unc': obs.get('maggies_unc', None), - 'phot': obs.get('maggies', None), - 'filter_names': obs.get('filter_names', None)}) + + # --- Update Noise Model Parameters --- + [obs.noise.update(**model.params) for obs in observations + if obs.noise is not None] # --- Generate mean model --- try: - t1 = time.time() - spec, phot, x = model.predict(theta, obs, sps=sps, sigma_spec=sigma_spec) - d1 = time.time() - t1 + predictions, x = model.predict(theta, observations, sps=sps) except(ValueError): return lnnull except: @@ -122,78 +105,52 @@ def lnprobfn(theta, model=None, obs=None, sps=None, noise=(None, None), # --- Optionally return chi vectors for least-squares --- # note this does not include priors! if residuals: - chispec = chi_spec(spec, obs) - chiphot = chi_phot(phot, obs) - return np.concatenate([chispec, chiphot]) - - # --- Mixture Model --- - f_outlier_spec = model.params.get('f_outlier_spec', 0.0) - if (f_outlier_spec != 0.0): - sigma_outlier_spec = model.params.get('nsigma_outlier_spec', 10) - vectors.update({'nsigma_outlier_spec': sigma_outlier_spec}) - f_outlier_phot = model.params.get('f_outlier_phot', 0.0) - if (f_outlier_phot != 0.0): - sigma_outlier_phot = model.params.get('nsigma_outlier_phot', 10) - vectors.update({'nsigma_outlier_phot': sigma_outlier_phot}) + chi = [compute_chi(spec, obs) for pred, obs in zip(predictions, observations)] + return np.concatenate(chi) # --- Emission Lines --- + lnp_eline = getattr(model, "_ln_eline_penalty", 0.0) # --- Calculate likelihoods --- - t1 = time.time() - lnp_spec = lnlike_spec(spec, obs=obs, - f_outlier_spec=f_outlier_spec, - spec_noise=spec_noise, - **vectors) - lnp_phot = lnlike_phot(phot, obs=obs, - f_outlier_phot=f_outlier_phot, - phot_noise=phot_noise, **vectors) - lnp_eline = getattr(model, '_ln_eline_penalty', 0.0) - - d2 = time.time() - t1 - if verbose: - write_log(theta, lnp_prior, lnp_spec, lnp_phot, d1, d2) - - lnp = lnp_prior + lnp_phot + lnp_spec + lnp_eline + lnp_data = [compute_lnlike(pred, obs, vectors={}) for pred, obs + in zip(predictions, observations)] + + lnp = lnp_prior + np.sum(lnp_data) + lnp_eline if negative: lnp *= -1 return lnp -def wrap_lnp(lnpfn, obs, model, sps, **lnp_kwargs): - return argfix(lnpfn, obs=obs, model=model, sps=sps, +def wrap_lnp(lnpfn, observations, model, sps, **lnp_kwargs): + return argfix(lnpfn, observations=observations, model=model, sps=sps, **lnp_kwargs) -def fit_model(obs, model, sps, noise=(None, None), lnprobfn=lnprobfn, +def fit_model(observations, model, sps, lnprobfn=lnprobfn, optimize=False, emcee=False, dynesty=True, **kwargs): """Fit a model to observations using a number of different methods - :param obs: - The ``obs`` dictionary containing the data to fit to, which will be - passed to ``lnprobfn``. + Parameters + ---------- + observations : list of :py:class:`observate.Observation` instances + The data to be fit. - :param model: - An instance of the :py:class:`prospect.models.SedModel` class - containing the model parameterization and parameter state. It will be + model : instance of the :py:class:`prospect.models.SedModel` + The model parameterization and parameter state. It will be passed to ``lnprobfn``. - :param sps: - An instance of a :py:class:`prospect.sources.SSPBasis` (sub-)class. - Alternatively, anything with a compatible :py:func:`get_spectrum` can + sps : instance of a :py:class:`prospect.sources.SSPBasis` (sub-)class. + The object used to construct the basic physical spectral model. + Anything with a compatible :py:func:`get_galaxy_spectrum` can be used here. It will be passed to ``lnprobfn`` - :param noise: (optional, default: (None, None)) - A tuple of NoiseModel objects for the spectroscopy and photometry - respectively. Can also be (None, None) in which case simple chi-square - will be used. - - :param lnprobfn: (optional, default: lnprobfn) - A posterior probability function that can take ``obs``, ``model``, - ``sps``, and ``noise`` as keywords. By default use the + lnprobfn : callable (optional, default: :py:meth:`lnprobfn`) + A posterior probability function that can take ``observations``, + ``model``, and ``sps`` as keywords. By default use the :py:func:`lnprobfn` defined above. - :param optimize: (optional, default: False) + optimize : bool (optional, default: False) If ``True``, conduct a round of optimization before sampling from the posterior. The model state will be set to the best value at the end of optimization before continuing on to sampling or returning. Parameters @@ -207,7 +164,7 @@ def fit_model(obs, model, sps, noise=(None, None), lnprobfn=lnprobfn, See :py:func:`run_minimize` for details. - :param emcee: (optional, default: False) + emcee : bool (optional, default: False) If ``True``, sample from the posterior using emcee. Additonal parameters controlling emcee can be passed via ``**kwargs``. These include @@ -217,18 +174,20 @@ def fit_model(obs, model, sps, noise=(None, None), lnprobfn=lnprobfn, Many additional emcee parameters can be provided here, see :py:func:`run_emcee` for details. - :param dynesty: + dynesty : bool (optional, default: True) If ``True``, sample from the posterior using dynesty. Additonal parameters controlling dynesty can be passed via ``**kwargs``. See :py:func:`run_dynesty` for details. - :returns output: + Returns + ------- + output : dictionary A dictionary with two keys, ``"optimization"`` and ``"sampling"``. The value of each of these is a 2-tuple with results in the first element and durations (in seconds) in the second element. """ # Make sure obs has required keys - obs = fix_obs(obs) + [obs.rectify() for obs in observations] if emcee & dynesty: msg = ("Cannot run both emcee and dynesty fits " @@ -243,7 +202,7 @@ def fit_model(obs, model, sps, noise=(None, None), lnprobfn=lnprobfn, "sampling": (None, 0.)} if optimize: - optres, topt, best = run_minimize(obs, model, sps, noise, + optres, topt, best = run_minimize(observations, model, sps, lnprobfn=lnprobfn, **kwargs) # set to the best model.set_parameters(optres[best].x) @@ -256,63 +215,61 @@ def fit_model(obs, model, sps, noise=(None, None), lnprobfn=lnprobfn, else: return output - output["sampling"] = run_sampler(obs, model, sps, noise, + output["sampling"] = run_sampler(observations, model, sps, lnprobfn=lnprobfn, **kwargs) return output -def run_minimize(obs=None, model=None, sps=None, noise=None, lnprobfn=lnprobfn, +def run_minimize(observations=None, model=None, sps=None, lnprobfn=lnprobfn, min_method='lm', min_opts={}, nmin=1, pool=None, **extras): """Run a minimization. This wraps the lnprobfn fixing the ``obs``, ``model``, ``noise``, and ``sps`` objects, and then runs a minimization of -lnP using scipy.optimize methods. - :param obs: - The ``obs`` dictionary containing the data to fit to, which will be - passed to ``lnprobfn``. + Parameters + ---------- + observations : list of :py:class:`observate.Observation` instances + The data to be fit. - :param model: - An instance of the :py:class:`prospect.models.SedModel` class - containing the model parameterization and parameter state. It will be + model : instance of the :py:class:`prospect.models.SedModel` + The model parameterization and parameter state. It will be passed to ``lnprobfn``. - :param sps: - An instance of a :py:class:`prospect.sources.SSPBasis` (sub-)class. - Alternatively, anything with a compatible :py:func:`get_spectrum` can + sps : instance of a :py:class:`prospect.sources.SSPBasis` (sub-)class. + The object used to construct the basic physical spectral model. + Anything with a compatible :py:func:`get_galaxy_spectrum` can be used here. It will be passed to ``lnprobfn`` - :param noise: (optional) - If given, a tuple of :py:class:`NoiseModel` objects passed to - ``lnprobfn``. - - :param lnprobfn: (optional, default: lnprobfn) - A posterior probability function that can take ``obs``, ``model``, - ``sps``, and ``noise`` as keywords. By default use the + lnprobfn : callable (optional, default: :py:meth:`lnprobfn`) + A posterior probability function that can take ``observations``, + ``model``, and ``sps`` as keywords. By default use the :py:func:`lnprobfn` defined above. - :param min_method: (optional, default: 'lm') + min_method : string (optional, default: 'lm') Method to use for minimization * 'lm': Levenberg-Marquardt * 'powell': Powell line search method - :param nmin: (optional, default: 1) + nmin : int (optional, default: 1) Number of minimizations to do. Beyond the first, minimizations will be started from draws from the prior. - :param min_opts: (optional, default: {}) + min_opts : dict (optional, default: {}) Dictionary of minimization options passed to the scipy.optimize method. These include things like 'xtol', 'ftol', etc.. - :param pool: (optional, default: None) + pool : object (optional, default: None) A pool to use for parallel optimization from multiple initial positions. - :returns results: + Returns + ------- + results : A list of `scipy.optimize.OptimizeResult` objects. - :returns tm: + t_wall : float Wall time used for the minimization, in seconds. - :returns best: + best : int The index of the results list containing the lowest chi-square result. """ initial = model.theta.copy() @@ -330,8 +287,12 @@ def run_minimize(obs=None, model=None, sps=None, noise=None, lnprobfn=lnprobfn, residuals = False args = [] +<<<<<<< HEAD loss = argfix(lnprobfn, obs=obs, model=model, sps=sps, noise=noise, residuals=residuals, negative=True) +======= + loss = argfix(lnprobfn, observations=observations, model=model, sps=sps, residuals=residuals) +>>>>>>> 5617c8c (fitting ubdates for observation lists; dosctring modernization.) minimizer = minimize_wrapper(algorithm, loss, [], min_method, min_opts) qinit = minimizer_ball(initial, nmin, model) @@ -353,60 +314,54 @@ def run_minimize(obs=None, model=None, sps=None, noise=None, lnprobfn=lnprobfn, return results, tm, best -def run_emcee(obs, model, sps, noise, lnprobfn=lnprobfn, - hfile=None, initial_positions=None, - **kwargs): +def run_emcee(observations, model, sps, lnprobfn=lnprobfn, + hfile=None, initial_positions=None, **kwargs): """Run emcee, optionally including burn-in and convergence checking. Thin wrapper on :py:class:`prospect.fitting.ensemble.run_emcee_sampler` - :param obs: - The ``obs`` dictionary containing the data to fit to, which will be - passed to ``lnprobfn``. + Parameters + ---------- + observations : list of :py:class:`observate.Observation` instances + The data to be fit. - :param model: - An instance of the :py:class:`prospect.models.SedModel` class - containing the model parameterization and parameter state. It will be + model : instance of the :py:class:`prospect.models.SedModel` + The model parameterization and parameter state. It will be passed to ``lnprobfn``. - :param sps: - An instance of a :py:class:`prospect.sources.SSPBasis` (sub-)class. - Alternatively, anything with a compatible :py:func:`get_spectrum` can + sps : instance of a :py:class:`prospect.sources.SSPBasis` (sub-)class. + The object used to construct the basic physical spectral model. + Anything with a compatible :py:func:`get_galaxy_spectrum` can be used here. It will be passed to ``lnprobfn`` - :param noise: - A tuple of :py:class:`NoiseModel` objects passed to ``lnprobfn``. - - :param lnprobfn: (optional, default: lnprobfn) - A posterior probability function that can take ``obs``, ``model``, - ``sps``, and ``noise`` as keywords. By default use the + lnprobfn : callable (optional, default: :py:meth:`lnprobfn`) + A posterior probability function that can take ``observations``, + ``model``, and ``sps`` as keywords. By default use the :py:func:`lnprobfn` defined above. - :param hfile: (optional, default: None) + hfile : :py:class:`h5py.File()` instance (optional, default: None) A file handle for a :py:class:`h5py.File` object that will be written to incremantally during sampling. - :param initial_positions: (optional, default: None) - If given, a set of initial positions for the emcee walkers. Must have - shape (nwalkers, ndim). Rounds of burn-in will be skipped if this - parameter is present. + initial_positions : ndarray of shape ``(nwalkers, ndim)`` (optional, default: None) + If given, a set of initial positions for the emcee walkers. Rounds of + burn-in will be skipped if this parameter is present. Extra Parameters -------- - - :param nwalkers: + nwalkers : int The number of walkers to use. If None, use the nearest power of two to ``ndim * walker_factor``. - :param niter: + niter : int Number of iterations for the production run - :param nburn: + nburn : list of int List of the number of iterations to run in each round of burn-in (for removing stuck walkers.) E.g. `nburn=[32, 64]` will run the sampler for 32 iterations before reinitializing and then run the sampler for another 64 iterations before starting the production run. - :param storechain: (default: True) + storechain : bool (default: True) If using HDF5 output, setting this to False will keep the chain from being held in memory by the sampler object. @@ -432,19 +387,17 @@ def run_emcee(obs, model, sps, noise, lnprobfn=lnprobfn, Returns -------- - - :returns sampler: + sampler : An instance of :py:class:`emcee.EnsembleSampler`. - :returns ts: + t_wall : float Duration of sampling (including burn-in) in seconds of wall time. """ q = model.theta.copy() - postkwargs = {"obs": obs, + postkwargs = {"observations": observations, "model": model, "sps": sps, - "noise": noise, "nested": False, } @@ -467,62 +420,62 @@ def run_emcee(obs, model, sps, noise, lnprobfn=lnprobfn, return sampler, ts +<<<<<<< HEAD def run_dynesty(obs, model, sps, noise, lnprobfn=lnprobfn, pool=None, nested_target_n_effective=10000, **kwargs): +======= +def run_dynesty(obs, model, sps, lnprobfn=lnprobfn, + pool=None, nested_posterior_thresh=0.05, **kwargs): +>>>>>>> 5617c8c (fitting ubdates for observation lists; dosctring modernization.) """Thin wrapper on :py:class:`prospect.fitting.nested.run_dynesty_sampler` - :param obs: - The ``obs`` dictionary containing the data to fit to, which will be - passed to ``lnprobfn``. + Parameters + ---------- + observations : list of :py:class:`observate.Observation` instances + The data to be fit. - :param model: - An instance of the :py:class:`prospect.models.SedModel` class - containing the model parameterization and parameter state. It will be + model : instance of the :py:class:`prospect.models.SedModel` + The model parameterization and parameter state. It will be passed to ``lnprobfn``. - :param sps: - An instance of a :py:class:`prospect.sources.SSPBasis` (sub-)class. - Alternatively, anything with a compatible :py:func:`get_spectrum` can + sps : instance of a :py:class:`prospect.sources.SSPBasis` (sub-)class. + The object used to construct the basic physical spectral model. + Anything with a compatible :py:func:`get_galaxy_spectrum` can be used here. It will be passed to ``lnprobfn`` - :param noise: - A tuple of :py:class:`prospect.likelihood.NoiseModel` objects passed to - ``lnprobfn``. - - :param lnprobfn: (optional, default: :py:func:`lnprobfn`) - A posterior probability function that can take ``obs``, ``model``, - ``sps``, and ``noise`` as keywords. This function must also take a - ``nested`` keyword. + lnprobfn : callable (optional, default: :py:meth:`lnprobfn`) + A posterior probability function that can take ``observations``, + ``model``, and ``sps`` as keywords. By default use the + :py:func:`lnprobfn` defined above. Extra Parameters -------- - :param nested_bound: (optional, default: 'multi') + nested_bound: (optional, default: 'multi') - :param nested_sample: (optional, default: 'unif') + nested_sample: (optional, default: 'unif') - :param nested_nlive_init: (optional, default: 100) + nested_nlive_init: (optional, default: 100) - :param nested_nlive_batch: (optional, default: 100) + nested_nlive_batch: (optional, default: 100) - :param nested_dlogz_init: (optional, default: 0.02) + nested_dlogz_init: (optional, default: 0.02) - :param nested_maxcall: (optional, default: None) + nested_maxcall: (optional, default: None) - :param nested_walks: (optional, default: 25) + nested_walks: (optional, default: 25) Returns -------- - - :returns result: + result: An instance of :py:class:`dynesty.results.Results`. - :returns ts: + t_wall : float Duration of sampling in seconds of wall time. """ from dynesty.dynamicsampler import stopping_function, weight_function nested_stop_kwargs = {"target_n_effective": nested_target_n_effective} - lnp = wrap_lnp(lnprobfn, obs, model, sps, noise=noise, + lnp = wrap_lnp(lnprobfn, observations, model, sps, noise=noise, nested=True) # Need to deal with postkwargs... diff --git a/prospect/likelihood/likelihood.py b/prospect/likelihood/likelihood.py index 1b648cde..590723fe 100644 --- a/prospect/likelihood/likelihood.py +++ b/prospect/likelihood/likelihood.py @@ -1,217 +1,48 @@ +# -*- coding: utf-8 -*- + import time, sys, os import numpy as np from scipy.linalg import LinAlgError -__all__ = ["lnlike_spec", "lnlike_phot", "chi_spec", "chi_phot", "write_log"] - - -def lnlike_spec(spec_mu, obs=None, spec_noise=None, f_outlier_spec=0.0, **vectors): - """Calculate the likelihood of the spectroscopic data given the - spectroscopic model. Allows for the use of a gaussian process - covariance matrix for multiplicative residuals. - - :param spec_mu: - The mean model spectrum, in linear or logarithmic units, including - e.g. calibration and sky emission. - - :param obs: (optional) - A dictionary of the observational data, including the keys - *``spectrum`` a numpy array of the observed spectrum, in linear or - logarithmic units (same as ``spec_mu``). - *``unc`` the uncertainty of same length as ``spectrum`` - *``mask`` optional boolean array of same length as ``spectrum`` - *``wavelength`` if using a GP, the metric that is used in the - kernel generation, of same length as ``spectrum`` and typically - giving the wavelength array. +from .noise_model import NoiseModel - :param spec_noise: (optional) - A NoiseModel object with the methods `compute` and `lnlikelihood`. - If ``spec_noise`` is supplied, the `wavelength` entry in the obs - dictionary must exist. +__all__ = ["compute_lnlike", "compute_chi"] - :param f_outlier_spec: (optional) - The fraction of spectral pixels which are considered outliers - by the mixture model - :param vectors: (optional) - A dictionary of vectors of same length as ``wavelength`` giving - possible weghting functions for the kernels - - :returns lnlikelhood: - The natural logarithm of the likelihood of the data given the mean - model spectrum. - """ - if obs['spectrum'] is None: - return 0.0 +basic_noise_model = NoiseModel() - mask = obs.get('mask', slice(None)) - vectors['mask'] = mask - vectors['wavelength'] = obs['wavelength'] - delta = (obs['spectrum'] - spec_mu)[mask] - var = (obs['unc'][mask])**2 +def compute_lnlike(pred, obs, vectors={}): + """Calculate the likelihood of the observational data given the + prediction. This is a very thin wrapper on the noise model that should be + attached to each Observation instance. - if spec_noise is not None: - try: - spec_noise.compute(**vectors) - if (f_outlier_spec == 0.0): - return spec_noise.lnlikelihood(spec_mu[mask], obs['spectrum'][mask]) - - # disallow (correlated noise model + mixture model) - # and redefine errors - assert spec_noise.Sigma.ndim == 1 - var = spec_noise.Sigma - - except(LinAlgError): - return np.nan_to_num(-np.inf) - - lnp = -0.5*( (delta**2/var) + np.log(2*np.pi*var) ) - if (f_outlier_spec == 0.0): - return lnp.sum() - else: - var_bad = var * (vectors["nsigma_outlier_spec"]**2) - lnp_bad = -0.5*( (delta**2/var_bad) + np.log(2*np.pi*var_bad) ) - lnp_tot = np.logaddexp(lnp + np.log(1-f_outlier_spec), lnp_bad + np.log(f_outlier_spec)) - - return lnp_tot.sum() - - -def lnlike_phot(phot_mu, obs=None, phot_noise=None, f_outlier_phot=0.0, **vectors): - """Calculate the likelihood of the photometric data given the spectroscopic - model. Allows for the use of a gaussian process covariance matrix. - - :param phot_mu: - The mean model sed, in linear flux units (i.e. maggies). + :param pred: + The predicted data, including calibration. :param obs: (optional) - A dictionary of the observational data, including the keys - *``maggies`` a numpy array of the observed SED, in linear flux - units - *``maggies_unc`` the uncertainty of same length as ``maggies`` - *``phot_mask`` optional boolean array of same length as - ``maggies`` - *``filters`` optional list of sedpy.observate.Filter objects, - necessary if using fixed filter groups with different gp - amplitudes for each group. - If not supplied then the obs dictionary given at initialization will - be used. - - :param phot_noise: (optional) - A ``prospect.likelihood.NoiseModel`` object with the methods - ``compute()`` and ``lnlikelihood()``. If not supplied a simple chi^2 - likelihood will be evaluated. - - :param f_outlier_phot: (optional) - The fraction of photometric bands which are considered outliers - by the mixture model + Instance of observation.Observation() or subclass thereof. - :param vectors: - A dictionary of possibly relevant vectors of same length as maggies - that will be passed to the NoiseModel object for constructing weighted - covariance matrices. - - :returns lnlikelhood: - The natural logarithm of the likelihood of the data given the mean - model spectrum. + :param vectors: (optional) + A dictionary of vectors of same length as ``obs.wavelength`` giving + possible weghting functions for the kernels """ - if obs['maggies'] is None: - return 0.0 - - mask = obs.get('phot_mask', slice(None)) - delta = (obs['maggies'] - phot_mu)[mask] - var = (obs['maggies_unc'][mask])**2 - psamples = obs.get('phot_samples', None) - - if phot_noise is not None: - try: - filternames = obs['filters'].filternames - except(AttributeError): - filternames = [f.name for f in obs['filters']] - vectors['mask'] = mask - vectors['filternames'] = np.array(filternames) - vectors['phot_samples'] = psamples - try: - phot_noise.compute(**vectors) - if (f_outlier_phot == 0.0): - return phot_noise.lnlikelihood(phot_mu[mask], obs['maggies'][mask]) - - # disallow (correlated noise model + mixture model) - # and redefine errors - assert phot_noise.Sigma.ndim == 1 - var = phot_noise.Sigma - - except(LinAlgError): - return np.nan_to_num(-np.inf) - - # simple noise model - lnp = -0.5*( (delta**2/var) + np.log(2*np.pi*var) ) - if (f_outlier_phot == 0.0): - return lnp.sum() - else: - var_bad = var * (vectors["nsigma_outlier_phot"]**2) - lnp_bad = -0.5*( (delta**2/var_bad) + np.log(2*np.pi*var_bad) ) - lnp_tot = np.logaddexp(lnp + np.log(1-f_outlier_phot), lnp_bad + np.log(f_outlier_phot)) - - return lnp_tot.sum() - + try: + return obs.noise.lnlike(pred, obs, vectors=vectors) + except: + return basic_noise_model.lnlike(pred, obs, vectors=vectors) -def chi_phot(phot_mu, obs, **extras): - """Return a vector of chi values, for use in non-linear least-squares - algorithms. - :param phot_mu: - Model photometry, same units as the photometry in `obs`. - - :param obs: - An observational data dictionary, with the keys ``"maggies"`` and - ``"maggies_unc"``. If ``"maggies"`` is None then an empty array is - returned. - - :returns chi: - An array of noise weighted residuals, same length as the number of - unmasked phtometric points. +def compute_chi(pred, obs): """ - if obs['maggies'] is None: - return np.array([]) - - mask = obs.get('phot_mask', slice(None)) - delta = (obs['maggies'] - phot_mu)[mask] - unc = obs['maggies_unc'][mask] - chi = delta / unc - return chi - - -def chi_spec(spec_mu, obs, **extras): - """Return a vector of chi values, for use in non-linear least-squares - algorithms. + Parameters + ---------- + pred : ndarray of shape (ndata,) + The model prediction for this observation - :param spec_mu: - Model spectroscopy, same units as the photometry in `obs`. - - :param obs: - An observational data dictionary, with the keys ``"spectrum"`` and - ``"unc"``. If ``"spectrum"`` is None then an empty array is returned. - Optinally a ``"mask"`` boolean vector may be supplied that will be used - to index the residual vector. - - :returns chi: - An array of noise weighted residuals, same length as the number of - unmasked spectroscopic points. + obs : instance of Observation() + The observational data """ - if obs['spectrum'] is None: - return np.array([]) - mask = obs.get('mask', slice(None)) - delta = (obs['spectrum'] - spec_mu)[mask] - unc = obs['unc'][mask] - chi = delta / unc - return chi + chi = (pred - obs.flux) / obs.uncertainty + return chi[obs.mask] - -def write_log(theta, lnp_prior, lnp_spec, lnp_phot, d1, d2): - """Write all sorts of documentary info for debugging. - """ - print(theta) - print('model calc = {0}s, lnlike calc = {1}'.format(d1, d2)) - fstring = 'lnp = {0}, lnp_spec = {1}, lnp_phot = {2}' - values = [lnp_spec + lnp_phot + lnp_prior, lnp_spec, lnp_phot] - print(fstring.format(*values)) diff --git a/prospect/likelihood/noise_model.py b/prospect/likelihood/noise_model.py index 02f155a3..1f6b18de 100644 --- a/prospect/likelihood/noise_model.py +++ b/prospect/likelihood/noise_model.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + import numpy as np from scipy.linalg import cho_factor, cho_solve try: @@ -5,21 +7,104 @@ except(ImportError): pass -__all__ = ["NoiseModel", "NoiseModelKDE"] +__all__ = ["NoiseModel", "NoiseModelCov", "NoiseModelKDE"] + + +class NoiseModel: + + """This class allows for 1-d covariance matrix noise models without any + special kernels for covariance matrix construction. + """ + + f_outlier = 0 + n_sigma_outlier = 50 + + def __init__(self, f_outlier_name="f_outlier", n_sigma_name="nsigma_outlier"): + self.f_outlier_name = f_outlier_name + self.n_sigma_name = n_sigma_name + self.kernels = [] + + def update(self, **params): + self.f_outlier = params.get(self.f_outlier_name, 0) + self.n_sigma_outlier = params.get(self.n_sigma_name, 50) + [k.update(**params) for k in self.kernels] + + def lnlike(self, pred, obs, vectors={}): + + # Construct Sigma (and factorize if 2d) + vectors = self.populate_vectors(obs) + self.compute(**vectors) + + # Compute likelihood + if (self.f_outlier == 0.0): + # Let the noise model do it + lnp = self.lnlikelihood(pred[obs.mask], obs.flux[obs.mask]) + return lnp + elif self.f_outlier > 0: + # Use the noise model variance, but otherwise compute on our own + assert self.Sigma.ndim == 1, "Outlier modeling only available for uncorrelated errors" + delta = obs.flux[obs.mask] - pred[obs.mask] + var = self.Sigma + lnp = -0.5*((delta**2 / var) + np.log(2*np.pi*var)) + var_bad = var * (self.n_sigma_outlier**2) + lnp_bad = -0.5*((delta**2 / var_bad) + np.log(2*np.pi*var_bad)) + lnp_tot = np.logaddexp(lnp + np.log(1 - self.f_outlier), lnp_bad + np.log(self.f_outlier)) + return lnp_tot + else: + raise ValueError("f_outlier must be >= 0") + + def populate_vectors(self, obs, vectors={}): + # update vectors + vectors["mask"] = obs.mask + vectors["unc"] = obs.uncertainty + if obs.kind == "photometry": + vectors["filternames"] = obs.filternames + vectors["phot_samples"] = obs.get("phot_samples", None) + return vectors + + def construct_covariance(self, unc=[], mask=slice(None), **vectors): + self.Sigma = np.atleast_1d(unc[mask]**2) + + def compute(self, **vectors): + """Make a boring diagonal Covariance array + """ + self.construct_covariance(**vectors) + self.log_det = np.sum(np.log(self.Sigma)) + + def lnlikelihood(self, pred, data): + """Simple ln-likihood for diagonal covariance matrix. + """ + delta = data - pred + lnp = -0.5*(np.dot(delta**2, np.log(2*np.pi) / self.Sigma) + + self.log_det) + return lnp.sum() + +class NoiseModelCov(NoiseModel): + """This object allows for 1d or 2d covariance matrices constructed from kernels + """ -class NoiseModel(object): + def __init__(self, f_outlier_name="f_outlier", n_sigma_name="nsigma_outlier", + metric_name='', mask_name='mask', kernels=[], weight_by=[]): - def __init__(self, metric_name='', mask_name='mask', kernels=[], - weight_by=[]): + super().__init__(f_outlier_name=f_outlier_name, + n_sigma_name=n_sigma_name) assert len(kernels) == len(weight_by) self.kernels = kernels self.weight_names = weight_by self.metric_name = metric_name self.mask_name = mask_name - def update(self, **params): - [k.update(**params) for k in self.kernels] + def populate_vectors(self, vectors, obs): + # update vectors + vectors["mask"] = obs.mask + vectors["wavelength"] = obs.wavelength + vectors["unc"] = obs.uncertainty + vectors["flux"] = obs.flux + if obs.kind == "photometry": + vectors["filternames"] = obs.filternames + vectors["phot_samples"] = obs.get("phot_samples", None) + return vectors def construct_covariance(self, **vectors): """Construct a covariance matrix from a metric, a list of kernel @@ -58,38 +143,45 @@ def compute(self, check_finite=False, **vectors): and cache that. Also cache ``log_det``. """ self.Sigma = self.construct_covariance(**vectors) - if self.Sigma.ndim > 1: - self.factorized_Sigma = cho_factor(self.Sigma, overwrite_a=True, - check_finite=check_finite) + if self.Sigma.ndim == 1: + self.log_det = np.sum(np.log(self.Sigma)) + else: + self.factorized_Sigma = cho_factor(self.Sigma, overwrite_a=True, check_finite=check_finite) self.log_det = 2 * np.sum(np.log(np.diag(self.factorized_Sigma[0]))) assert np.isfinite(self.log_det) - else: - self.log_det = np.sum(np.log(self.Sigma)) - def lnlikelihood(self, phot_mu, phot_obs, check_finite=False, **extras): - """Compute the ln of the likelihood, using the current factorized - covariance matrix. + def lnlikelihood(self, prediction, data, check_finite=False): + """Compute the ln of the likelihood, using the current cached (and + factorized if non-diagonal) covariance matrix. - :param phot_mu: - Model photometry, same units as the photometry in `phot_obs`. - :param phot_obs: - Observed photometry, in linear flux units (i.e. maggies). + Parameters + ---------- + prediction : ndarray of float + Model flux, same units as `data`. + + data : ndarray of float + Observed flux, in linear flux units (i.e. maggies). + + Returns + ------- + lnlike : float + The likelihood fo the data """ - residual = phot_obs - phot_mu + residual = data - prediction n = len(residual) assert n == self.Sigma.shape[0] - if self.Sigma.ndim > 1: - first_term = np.dot(residual, cho_solve(self.factorized_Sigma, - residual, check_finite=check_finite)) + if self.Sigma.ndim == 1: + first_term = np.dot(residual**2, 1.0 / self.Sigma) else: - first_term = np.dot(residual**2, 1.0/self.Sigma) + CinvD = cho_solve(self.factorized_Sigma, residual, check_finite=check_finite) + first_term = np.dot(residual, CinvD) lnlike = -0.5 * (first_term + self.log_det + n * np.log(2.*np.pi)) return lnlike -class NoiseModelKDE(object): +class NoiseModelKDE: def __init__(self, metric_name="phot_samples", mask_name="mask"): # , kernel=None, weight_by=None): diff --git a/prospect/models/sedmodel.py b/prospect/models/sedmodel.py index d92be6cc..da6163a1 100644 --- a/prospect/models/sedmodel.py +++ b/prospect/models/sedmodel.py @@ -62,25 +62,25 @@ def _available_parameters(self): return new_pars - def predict(self, theta, obslist=None, sps=None, sigma_spec=None, **extras): + def predict(self, theta, observations=None, sps=None, **extras): """Given a ``theta`` vector, generate a spectrum, photometry, and any extras (e.g. stellar mass), including any calibration effects. - :param theta: - ndarray of parameter values, of shape ``(ndim,)`` + Parameters + ---------- + theta : ndarray of shape ``(ndim,)`` + Vector of free model parameter values. - :param obslist: - A list of `Observation` instances. + observations : A list of `Observation` instances. + The data to predict - :param sps: + sps : An `sps` object to be used in the model generation. It must have the :py:func:`get_galaxy_spectrum` method defined. - :param sigma_spec: (optional) - The covariance matrix for the spectral noise. It is only used for - emission line marginalization. - - :returns predictions: (list of ndarrays) + Returns + ------- + predictions: (list of ndarrays) List of predictions for the given list of observations. If the observation kind is "spectrum" then this is the model spectrum for these @@ -92,12 +92,13 @@ def predict(self, theta, obslist=None, sps=None, sigma_spec=None, **extras): photometry for these parameters, for the filters specified in ``obs['filters']``. Units of maggies. - :returns extras: + extras : Any extra aspects of the model that are returned. Typically this will be `mfrac` the ratio of the surviving stellar mass to the stellar mass formed. """ - # generate and cache model spectrum and info + + # generate and cache intrinsic model spectrum and info self.set_parameters(theta) self._wave, self._spec, self._mfrac = sps.get_galaxy_spectrum(**self.params) self._zred = self.params.get('zred', 0) @@ -116,20 +117,18 @@ def predict(self, theta, obslist=None, sps=None, sigma_spec=None, **extras): # generate predictions for likelihood # this assumes all spectral datasets (if present) occur first # because they can change the line strengths during marginalization. - predictions = [self.predict_one(obs, sigma_spec=sigma_spec) - for obs in obslist] + predictions = [self.predict_obs(obs) for obs in observations] return predictions, self._mfrac - def predict_one(self, obs, sigma_spec=None): - self.cache_eline_parameters(obs) + def predict_obs(self, obs, sigma_spec=None): if obs.kind == "spectrum": - prediction = self.predict_spec(obs, sigma_spec) + prediction = self.predict_spec(obs) elif obs.kind == "photometry": prediction = self.predict_phot(obs["filters"]) return prediction - def predict_spec(self, obs, sigma_spec=None, **extras): + def predict_spec(self, obs, **extras): """Generate a prediction for the observed spectrum. This method assumes that the parameters have been set and that the following attributes are present and correct @@ -169,14 +168,19 @@ def predict_spec(self, obs, sigma_spec=None, **extras): including multiplication by the calibration vector. ndarray of shape ``(nwave,)`` in units of maggies. """ - # redshift wavelength + self._outwave = obs['wavelength'] + + # redshift model wavelength obs_wave = self.observed_wave(self._wave, do_wavecal=False) - self._outwave = obs.get('wavelength', obs_wave) - if self._outwave is None: - self._outwave = obs_wave + + # Set up for emission lines + self.cache_eline_parameters(obs) # --- smooth and put on output wavelength grid --- + # physical smoothing smooth_spec = self.smoothspec(obs_wave, self._norm_spec) + # instrumental smoothing (accounting for library resolution) + smooth_spec = obs.instrumental_smoothing(self._outwave, smooth_spec, libres=0) # --- add fixed lines if necessary --- emask = self._fix_eline_pixelmask @@ -194,7 +198,12 @@ def predict_spec(self, obs, sigma_spec=None, **extras): # --- fit and add lines if necessary --- emask = self._fit_eline_pixelmask if emask.any(): - self._fit_eline_spec = self.fit_el(obs, calibrated_spec, sigma_spec) + # We need the spectroscopic covariance matrix to do emission line optimization and marginalization + sigma_spec = None + # FIXME: do this only if the noise model is non-trivial, and make sure masking is consistent + #vectors = obs.noise.populate_vectors(obs) + #sigma_spec = obs.noise.construct_covariance(**vectors) + self._fit_eline_spec = self.get_el(obs, calibrated_spec, sigma_spec) calibrated_spec[emask] += self._fit_eline_spec.sum(axis=1) # --- cache intrinsic spectrum --- @@ -228,9 +237,7 @@ def predict_phot(self, filters): # generate photometry w/o emission lines obs_wave = self.observed_wave(self._wave, do_wavecal=False) flambda = self._norm_spec * lightspeed / obs_wave**2 * (3631*jansky_cgs) - phot = 10**(-0.4 * np.atleast_1d(getSED(obs_wave, flambda, filters))) - # TODO: below is faster for sedpy > 0.2.0 - #phot = np.atleast_1d(getSED(obs_wave, flambda, filters, linear_flux=True)) + phot = np.atleast_1d(getSED(obs_wave, flambda, filters, linear_flux=True)) # generate emission-line photometry if (self._want_lines & self._need_lines): @@ -670,10 +677,16 @@ def spec_calibration(self, theta=None, obs=None, spec=None, **kwargs): spectrum, conditional on all other parameters. If emission lines are being marginalized out, they are excluded from the least-squares fit. - :param obs: - Instance of `Spectrum` + Parameters + ---------- + obs : Instance of `Spectrum` - :returns cal: + spec : ndarray of shape (nwave,) + The model spectrum. + + Returns + ------- + cal : ndarray of shape (nwave,) A polynomial given by :math:`\sum_{m=0}^M a_{m} * T_m(x)`. """ if theta is not None: diff --git a/prospect/utils/observation.py b/prospect/utils/observation.py index da159e8c..da1e3b69 100644 --- a/prospect/utils/observation.py +++ b/prospect/utils/observation.py @@ -3,9 +3,16 @@ import json import numpy as np +from sedpy.observate import FilterSet +from sedpy.smoothing import smoothspec + +from ..likelihood.noise_model import NoiseModel + + __all__ = ["Observation", "Spectrum", "Photometry", "from_oldstyle"] + class NumpyEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, np.ndarray): @@ -17,6 +24,15 @@ def default(self, obj): class Observation: + """ + Attributes + ---------- + flux : + uncertainty : + mask : + noise : + """ + logify_spectrum = False alias = {} @@ -24,12 +40,14 @@ def __init__(self, flux=None, uncertainty=None, mask=slice(None), + noise=NoiseModel(), **kwargs ): self.flux = flux self.uncertainty = uncertainty self.mask = mask + self.noise = noise self.from_oldstyle(**kwargs) def __getitem__(self, item): @@ -70,6 +88,7 @@ def rectify(self): (self.uncertainty > 0)) assert self.ndof > 0, "No valid data to fit: check the sign of the masks." + assert hasattr(self, "noise") def render(self, wavelength, spectrum): raise(NotImplementedError) @@ -101,13 +120,10 @@ class Photometry(Observation): def __init__(self, filters=[], **kwargs): super(Photometry, self).__init__(**kwargs) - self.filters = filters - - def render(self, wavelength, spectrum): - w, s = wavelength, spectrum - mags = [f.ab_mag(w, s, **self.render_kwargs) - for f in self.filters] - return 10**(-0.4 * np.array(mags)) + self.filterset = FilterSet(filters) + # filters on the gridded resolution + self.filters = [f for f in self.filterset.filters] + self.filternames = np.array([f.name for f in self.filters]) @property def wavelength(self): @@ -148,12 +164,19 @@ def __init__(self, self.wavelength = wavelength self.resolution = resolution self.calibration = calibration + self.instrument_smoothing_parameters = dict(smoothtype="R", fftsmooth=True) + + def instrumental_smoothing(self, inwave, influx, libres=0): + if self.resolution: + out = smoothspec(inwave, spec, + self.resolution, + outwave=self.wavelength, + **self.instrument_smoothing_parameters) + else: + #out = np.interp(self.wavelength, inwave, influx) + out = influx - def render(self, wavelength, spectrum): - if self.ndata > 0: - wave = self.wavelength - spec = np.interp(wave, wavelength, spectrum) - return wave, spec + return out def to_oldstyle(self): obs = vars(self) diff --git a/tests/test_predict.py b/tests/test_predict.py index a14d6f20..451103ae 100644 --- a/tests/test_predict.py +++ b/tests/test_predict.py @@ -1,6 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +import sys import numpy as np from sedpy.observate import load_filters @@ -9,8 +10,10 @@ from prospect.utils.observation import Spectrum, Photometry -def build_model(): +def build_model(add_neb=False): model_params = templates.TemplateLibrary["parametric_sfh"] + if add_neb: + model_params.update(templates.TemplateLibrary["nebular_emission"]) return SpecModel(model_params) @@ -19,9 +22,9 @@ def build_obs(multispec=True): wmax = 7000 wsplit = wmax - N * multispec - filterlist = load_filters([f"sdss_{b}0" for b in "ugriz"]) - Nf = len(filterlist) - phot = [Photometry(filters=filterlist, flux=np.ones(Nf), uncertainty=np.ones(Nf)/10)] + fnames = list([f"sdss_{b}0" for b in "ugriz"]) + Nf = len(fnames) + phot = [Photometry(filters=fnames, flux=np.ones(Nf), uncertainty=np.ones(Nf)/10)] spec = [Spectrum(wavelength=np.linspace(4000, wsplit, N), flux=np.ones(N), uncertainty=np.ones(N) / 10, mask=slice(None))] @@ -47,10 +50,8 @@ def build_sps(): model = build_model() sps = build_sps() - #sys.exit() - predictions_single, mfrac = model.predict(model.theta, obslist=obslist_single, sps=sps) - #sys.exit() - predictions, mfrac = model.predict(model.theta, obslist=obslist, sps=sps) + predictions_single, mfrac = model.predict(model.theta, observations=obslist_single, sps=sps) + predictions, mfrac = model.predict(model.theta, observations=obslist, sps=sps) import matplotlib.pyplot as pl fig, ax = pl.subplots() @@ -61,3 +62,14 @@ def build_sps(): else: ax.plot(o.wavelength, p) + # -- TESting --- + observations = obslist + arr = np.zeros(model.ndim) + from prospect.likelihood.likelihood import compute_lnlike + from prospect.fitting import lnprobfn + + sys.exit() + #%timeit model.prior_product(model.theta) + #%timeit predictions, x = model.predict(model.theta + np.random.uniform(0, 3) * arr, observations=obslist, sps=sps) + #%timeit lnp_data = [compute_lnlike(pred, obs, vectors={}) for pred, obs in zip(predictions, observations)] + #%timeit lnp = lnprobfn(model.theta + np.random.uniform(0, 3) * arr, model=model, observations=obslist, sps=sps) \ No newline at end of file From 4bb0e830e9832f201c650683b3ee7977e18a1afc Mon Sep 17 00:00:00 2001 From: Benjamin Johnson Date: Mon, 2 May 2022 11:50:55 -0400 Subject: [PATCH 04/33] Remove SedModel and it's subclasses; cache library resolution if avaialble. --- prospect/fitting/fitting.py | 20 +--- prospect/models/sedmodel.py | 224 +++--------------------------------- tests/test_predict.py | 4 +- 3 files changed, 18 insertions(+), 230 deletions(-) diff --git a/prospect/fitting/fitting.py b/prospect/fitting/fitting.py index e14ee930..7356d91d 100755 --- a/prospect/fitting/fitting.py +++ b/prospect/fitting/fitting.py @@ -24,13 +24,8 @@ ] -<<<<<<< HEAD -def lnprobfn(theta, model=None, observations=None, sps=None, noises=None, - residuals=False, nested=False, negative=False, verbose=False): -======= def lnprobfn(theta, model=None, observations=None, sps=None, - residuals=False, nested=False, verbose=False): ->>>>>>> 5617c8c (fitting ubdates for observation lists; dosctring modernization.) + residuals=False, nested=False, negative=False, verbose=False): """Given a parameter vector and optionally a dictionary of observational ata and a model object, return the matural log of the posterior. This requires that an sps object (and if using spectra and gaussian processes, a @@ -287,12 +282,8 @@ def run_minimize(observations=None, model=None, sps=None, lnprobfn=lnprobfn, residuals = False args = [] -<<<<<<< HEAD - loss = argfix(lnprobfn, obs=obs, model=model, sps=sps, - noise=noise, residuals=residuals, negative=True) -======= - loss = argfix(lnprobfn, observations=observations, model=model, sps=sps, residuals=residuals) ->>>>>>> 5617c8c (fitting ubdates for observation lists; dosctring modernization.) + loss = argfix(lnprobfn, observations=observations, model=model, sps=sps, + residuals=residuals, negative=True) minimizer = minimize_wrapper(algorithm, loss, [], min_method, min_opts) qinit = minimizer_ball(initial, nmin, model) @@ -420,13 +411,8 @@ def run_emcee(observations, model, sps, lnprobfn=lnprobfn, return sampler, ts -<<<<<<< HEAD def run_dynesty(obs, model, sps, noise, lnprobfn=lnprobfn, pool=None, nested_target_n_effective=10000, **kwargs): -======= -def run_dynesty(obs, model, sps, lnprobfn=lnprobfn, - pool=None, nested_posterior_thresh=0.05, **kwargs): ->>>>>>> 5617c8c (fitting ubdates for observation lists; dosctring modernization.) """Thin wrapper on :py:class:`prospect.fitting.nested.run_dynesty_sampler` Parameters diff --git a/prospect/models/sedmodel.py b/prospect/models/sedmodel.py index da6163a1..94d9aba6 100644 --- a/prospect/models/sedmodel.py +++ b/prospect/models/sedmodel.py @@ -22,7 +22,7 @@ __all__ = ["SpecModel", "PolySpecModel", "SplineSpecModel", "LineSpecModel", "AGNSpecModel", - "SedModel", "PolySedModel", "PolyFitModel"] + "PolyFitModel"] class SpecModel(ProspectorParams): @@ -103,6 +103,7 @@ def predict(self, theta, observations=None, sps=None, **extras): self._wave, self._spec, self._mfrac = sps.get_galaxy_spectrum(**self.params) self._zred = self.params.get('zred', 0) self._eline_wave, self._eline_lum = sps.get_galaxy_elines() + self._library_resolution = getattr(sps, "spectral_resolution", 0.0) # Flux normalize self._norm_spec = self._spec * self.flux_norm() @@ -121,7 +122,7 @@ def predict(self, theta, observations=None, sps=None, **extras): return predictions, self._mfrac - def predict_obs(self, obs, sigma_spec=None): + def predict_obs(self, obs): if obs.kind == "spectrum": prediction = self.predict_spec(obs) elif obs.kind == "photometry": @@ -180,7 +181,8 @@ def predict_spec(self, obs, **extras): # physical smoothing smooth_spec = self.smoothspec(obs_wave, self._norm_spec) # instrumental smoothing (accounting for library resolution) - smooth_spec = obs.instrumental_smoothing(self._outwave, smooth_spec, libres=0) + smooth_spec = obs.instrumental_smoothing(self._outwave, smooth_spec, + libres=self._library_resolution) # --- add fixed lines if necessary --- emask = self._fix_eline_pixelmask @@ -1083,201 +1085,10 @@ def predict_aline_spec(self, line_indices, wave): return aline_spec -class SedModel(ProspectorParams): +class PolyFitModel(SpecModel): - """A subclass of :py:class:`ProspectorParams` that passes the models - through to an ``sps`` object and returns spectra and photometry, including - optional spectroscopic calibration and sky emission. - """ - - def predict(self, theta, obs=None, sps=None, **extras): - """Given a ``theta`` vector, generate a spectrum, photometry, and any - extras (e.g. stellar mass), including any calibration effects. - - :param theta: - ndarray of parameter values, of shape ``(ndim,)`` - - :param obs: - An observation dictionary, containing the output wavelength array, - the photometric filter lists, and the observed fluxes and - uncertainties thereon. Assumed to be the result of - :py:func:`utils.obsutils.rectify_obs` - - :param sps: - An `sps` object to be used in the model generation. It must have - the :py:func:`get_spectrum` method defined. - - :param sigma_spec: (optional, unused) - The covariance matrix for the spectral noise. It is only used for - emission line marginalization. - - :returns spec: - The model spectrum for these parameters, at the wavelengths - specified by ``obs['wavelength']``, including multiplication by the - calibration vector. Units of maggies - - :returns phot: - The model photometry for these parameters, for the filters - specified in ``obs['filters']``. Units of maggies. - - :returns extras: - Any extra aspects of the model that are returned. Typically this - will be `mfrac` the ratio of the surviving stellar mass to the - stellar mass formed. - """ - s, p, x = self.sed(theta, obs, sps=sps, **extras) - self._speccal = self.spec_calibration(obs=obs, **extras) - if obs.get('logify_spectrum', False): - s = np.log(s) + np.log(self._speccal) - else: - s *= self._speccal - return s, p, x - - def sed(self, theta, obs=None, sps=None, **kwargs): - """Given a vector of parameters ``theta``, generate a spectrum, photometry, - and any extras (e.g. surviving mass fraction), ***not** including any - instrument calibration effects. The intrinsic spectrum thus produced is - cached in `_spec` attribute - - :param theta: - ndarray of parameter values. - - :param obs: - An observation dictionary, containing the output wavelength array, - the photometric filter lists, and the observed fluxes and - uncertainties thereon. Assumed to be the result of - :py:func:`utils.obsutils.rectify_obs` - - :param sps: - An `sps` object to be used in the model generation. It must have - the :py:func:`get_spectrum` method defined. - - :returns spec: - The model spectrum for these parameters, at the wavelengths - specified by ``obs['wavelength']``. Default units are maggies, and - the calibration vector is **not** applied. - - :returns phot: - The model photometry for these parameters, for the filters - specified in ``obs['filters']``. Units are maggies. - - :returns extras: - Any extra aspects of the model that are returned. Typically this - will be `mfrac` the ratio of the surviving stellar mass to the - steallr mass formed. - """ - self.set_parameters(theta) - spec, phot, extras = sps.get_spectrum(outwave=obs['wavelength'], - filters=obs['filters'], - component=obs.get('component', -1), - lnwavegrid=obs.get('lnwavegrid', None), - **self.params) - - spec *= obs.get('normalization_guess', 1.0) - # Remove negative fluxes. - try: - tiny = 1.0 / len(spec) * spec[spec > 0].min() - spec[spec < tiny] = tiny - except: - pass - spec = (spec + self.sky(obs)) - self._spec = spec.copy() - return spec, phot, extras - - def sky(self, obs): - """Model for the *additive* sky emission/absorption""" - return 0. - - def spec_calibration(self, theta=None, obs=None, **kwargs): - """Implements an overall scaling of the spectrum, given by the - parameter ``'spec_norm'`` - - :returns cal: (float) - A scalar multiplicative factor that gives the ratio between the true - spectrum and the observed spectrum - """ - if theta is not None: - self.set_parameters(theta) - - return 1.0 * self.params.get('spec_norm', 1.0) - - def wave_to_x(self, wavelength=None, mask=slice(None), **extras): - """Map unmasked wavelengths to the interval (-1, 1). Masked wavelengths may have x>1, x<-1 - - :param wavelength: - The input wavelengths. ndarray of shape ``(nwave,)`` - - :param mask: optional - The mask. slice or boolean array with ``True`` for unmasked elements. - The interval (-1, 1) will be defined only by unmasked wavelength points - - :returns x: - The wavelength vector, remapped to the interval (-1, 1). - ndarray of same shape as ``wavelength`` - """ - x = wavelength - (wavelength[mask]).min() - x = 2.0 * (x / (x[mask]).max()) - 1.0 - return x - - def mean_model(self, theta, obs, sps=None, sigma_spec=None, **extras): - """Legacy wrapper around predict() - """ - return self.predict(theta, obs, sps=sps, sigma=sigma_spec, **extras) - - -class PolySedModel(SedModel): - - """This is a subclass of SedModel that replaces the calibration vector with - the maximum likelihood chebyshev polynomial describing the difference - between the observed and the model spectrum. - """ - - def spec_calibration(self, theta=None, obs=None, **kwargs): - """Implements a Chebyshev polynomial calibration model. This uses - least-squares to find the maximum-likelihood Chebyshev polynomial of a - certain order describing the ratio of the observed spectrum to the - model spectrum, conditional on all other parameters, using least - squares. The first coefficient is always set to 1, as the overall - normalization is controlled by ``spec_norm``. - - :returns cal: - A polynomial given by 'spec_norm' * (1 + \sum_{m=1}^M a_{m} * T_m(x)). - """ - if theta is not None: - self.set_parameters(theta) - - norm = self.params.get('spec_norm', 1.0) - order = np.squeeze(self.params.get('polyorder', 0)) - polyopt = ((order > 0) & - (obs.get('spectrum', None) is not None)) - if polyopt: - mask = obs.get('mask', slice(None)) - # map unmasked wavelengths to the interval -1, 1 - # masked wavelengths may have x>1, x<-1 - x = self.wave_to_x(obs["wavelength"], mask) - y = (obs['spectrum'] / self._spec)[mask] / norm - 1.0 - yerr = (obs['unc'] / self._spec)[mask] / norm - yvar = yerr**2 - A = chebvander(x[mask], order)[:, 1:] - ATA = np.dot(A.T, A / yvar[:, None]) - reg = self.params.get('poly_regularization', 0.) - if np.any(reg > 0): - ATA += reg**2 * np.eye(order) - ATAinv = np.linalg.inv(ATA) - c = np.dot(ATAinv, np.dot(A.T, y / yvar)) - Afull = chebvander(x, order)[:, 1:] - poly = np.dot(Afull, c) - self._poly_coeffs = c - else: - poly = 0.0 - - return (1.0 + poly) * norm - - -class PolyFitModel(SedModel): - - """This is a subclass of *SedModel* that generates the multiplicative - calibration vector as a Chebyshev polynomial described by the + """This is a subclass of :py:class:`SpecModel` that generates the + multiplicative calibration vector as a Chebyshev polynomial described by the ``'poly_coeffs'`` parameter of the model, which may be free (fittable) """ @@ -1297,8 +1108,7 @@ def spec_calibration(self, theta=None, obs=None, **kwargs): :returns cal: If ``params["cal_type"]`` is ``"poly"``, a polynomial given by - ``'spec_norm'`` :math:`\times (1 + \Sum_{m=1}^M```'poly_coeffs'[m-1]``:math:` \times T_n(x))`. - Otherwise, the exponential of a Chebyshev polynomial. + :math:`\times (\Sum_{m=0}^M```'poly_coeffs'[m]``:math:` \times T_n(x))`. """ if theta is not None: self.set_parameters(theta) @@ -1308,20 +1118,12 @@ def spec_calibration(self, theta=None, obs=None, **kwargs): # map unmasked wavelengths to the interval -1, 1 # masked wavelengths may have x>1, x<-1 x = self.wave_to_x(obs["wavelength"], mask) - # get coefficients. Here we are setting the first term to 0 so we - # can deal with it separately for the exponential and regular - # multiplicative cases - c = np.insert(self.params['poly_coeffs'], 0, 0) + # get coefficients. + c = self.params['poly_coeffs'] poly = chebval(x, c) - # switch to have spec_norm be multiplicative or additive depending - # on whether the calibration model is multiplicative in exp^poly or - # just poly - if self.params.get('cal_type', 'exp_poly') == 'poly': - return (1.0 + poly) * self.params.get('spec_norm', 1.0) - else: - return np.exp(self.params.get('spec_norm', 0) + poly) + return poly else: - return 1.0 * self.params.get('spec_norm', 1.0) + return 1.0 def ln_mvn(x, mean=None, cov=None): diff --git a/tests/test_predict.py b/tests/test_predict.py index 451103ae..70c1e61a 100644 --- a/tests/test_predict.py +++ b/tests/test_predict.py @@ -13,7 +13,7 @@ def build_model(add_neb=False): model_params = templates.TemplateLibrary["parametric_sfh"] if add_neb: - model_params.update(templates.TemplateLibrary["nebular_emission"]) + model_params.update(templates.TemplateLibrary["nebular"]) return SpecModel(model_params) @@ -47,7 +47,7 @@ def build_sps(): if __name__ == "__main__": obslist_single = build_obs(multispec=False) obslist = build_obs() - model = build_model() + model = build_model(add_neb=True) sps = build_sps() predictions_single, mfrac = model.predict(model.theta, observations=obslist_single, sps=sps) From 563e713fa9937050bd1ea8982b29e86656521e5c Mon Sep 17 00:00:00 2001 From: Benjamin Johnson Date: Mon, 2 May 2022 14:53:44 -0400 Subject: [PATCH 05/33] Move observation submodule to new data module; Numerous tweaks to observation generation from old style dicts; Changes to test_eline for new predict() api. --- prospect/data/__init__.py | 6 + prospect/{utils => data}/observation.py | 97 +++++++--- prospect/{utils => data}/obsutils.py | 0 prospect/fitting/fitting.py | 10 +- prospect/models/__init__.py | 9 +- prospect/models/model_setup.py | 237 ------------------------ pyproject.toml | 2 +- tests/test_eline.py | 48 ++--- tests/test_predict.py | 38 ++-- 9 files changed, 129 insertions(+), 318 deletions(-) create mode 100644 prospect/data/__init__.py rename prospect/{utils => data}/observation.py (63%) rename prospect/{utils => data}/obsutils.py (100%) delete mode 100644 prospect/models/model_setup.py diff --git a/prospect/data/__init__.py b/prospect/data/__init__.py new file mode 100644 index 00000000..f4bb7dc4 --- /dev/null +++ b/prospect/data/__init__.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- + +from .observation import Photometry, Spectrum, from_oldstyle + +__all__ = ["Photometry", "Spectrum", + "from_oldstyle"] diff --git a/prospect/utils/observation.py b/prospect/data/observation.py similarity index 63% rename from prospect/utils/observation.py rename to prospect/data/observation.py index da1e3b69..85936802 100644 --- a/prospect/utils/observation.py +++ b/prospect/data/observation.py @@ -24,7 +24,8 @@ def default(self, obj): class Observation: - """ + """Data to be predicted (and fit) + Attributes ---------- flux : @@ -41,6 +42,7 @@ def __init__(self, uncertainty=None, mask=slice(None), noise=NoiseModel(), + name="ObsA", **kwargs ): @@ -48,6 +50,7 @@ def __init__(self, self.uncertainty = uncertainty self.mask = mask self.noise = noise + self.name = name self.from_oldstyle(**kwargs) def __getitem__(self, item): @@ -70,14 +73,18 @@ def from_oldstyle(self, **kwargs): if k in kwargs: setattr(self, v, kwargs[k]) - def rectify(self): - """Make sure required attributes are present and have the appropriate - sizes. Also auto-masks non-finite data or negative uncertainties. + def rectify(self, for_fitting=False): + """Make sure required attributes for fitting are present and have the + appropriate sizes. Also auto-masks non-finite data or negative + uncertainties. """ + assert self.wavelength.ndim == 1, "`wavelength` is not 1-d array" assert self.ndata > 0, "no wavelength points supplied!" - assert len(self.wavelength) == len(self.flux), "Flux array not same shape as wavelength" - assert len(self.wavelength) == len(self.uncertainty), "Uncertainty array not same shape as wavelength" + assert self.flux is not None, " No data." + assert self.uncertainty is not None, "No uncertainties." + assert len(self.wavelength) == len(self.flux), "Flux array not same shape as wavelength." + assert len(self.wavelength) == len(self.uncertainty), "Uncertainty array not same shape as wavelength." # make mask array with automatic filters marr = np.zeros(self.ndata, dtype=bool) @@ -87,7 +94,7 @@ def rectify(self): (np.isfinite(self.uncertainty)) & (self.uncertainty > 0)) - assert self.ndof > 0, "No valid data to fit: check the sign of the masks." + assert self.ndof == 0, f"{self.__repr__()} has no valid data to fit: check the sign of the masks." assert hasattr(self, "noise") def render(self, wavelength, spectrum): @@ -95,10 +102,12 @@ def render(self, wavelength, spectrum): @property def ndof(self): - return int(self.mask.sum()) + # TODO: cache this? + return int(np.sum(np.ones(self.ndata)[self.mask])) @property def ndata(self): + # TODO: cache this? if self.wavelength is None: return 0 else: @@ -117,13 +126,18 @@ class Photometry(Observation): filters="filters", phot_mask="mask") - def __init__(self, filters=[], **kwargs): + def __init__(self, filters=[], name="PhotA", **kwargs): + + if type(filters[0]) is str: + self.filternames = filters + else: + self.filternames = [f.name for f in filters] - super(Photometry, self).__init__(**kwargs) - self.filterset = FilterSet(filters) + self.filterset = FilterSet(self.filternames) # filters on the gridded resolution self.filters = [f for f in self.filterset.filters] - self.filternames = np.array([f.name for f in self.filters]) + + super(Photometry, self).__init__(name=name, **kwargs) @property def wavelength(self): @@ -149,10 +163,13 @@ def __init__(self, wavelength=None, resolution=None, calibration=None, + name="SpecA", **kwargs): """ - :param resolution: (optional, default: None) + Parameters + ---------- + resolution : (optional, default: None) Instrumental resolution at each wavelength point in units of km/s dispersion (:math:`= c \, \sigma_\lambda / \lambda = c \, \FWHM_\lambda / 2.355 / \lambda = c / (2.355 \, R_\lambda)` where :math:`c=2.998e5 {\rm km}/{\rm s}` @@ -160,22 +177,45 @@ def __init__(self, :param calibration: not sure yet .... """ - super(Spectrum, self).__init__(**kwargs) + super(Spectrum, self).__init__(name=name, **kwargs) self.wavelength = wavelength self.resolution = resolution self.calibration = calibration - self.instrument_smoothing_parameters = dict(smoothtype="R", fftsmooth=True) - - def instrumental_smoothing(self, inwave, influx, libres=0): - if self.resolution: - out = smoothspec(inwave, spec, - self.resolution, - outwave=self.wavelength, - **self.instrument_smoothing_parameters) - else: - #out = np.interp(self.wavelength, inwave, influx) - out = influx + self.instrument_smoothing_parameters = dict(smoothtype="vel", fftsmooth=True) + + def instrumental_smoothing(self, obswave, influx, libres=0): + """Smooth a spectrum by the instrumental resolution, optionally + accounting (in quadrature) the intrinsic library resolution. + Parameters + ---------- + obswave : ndarray + Observed frame wavelengths, in units of AA + + influx : ndarray + Flux array + + libres : float or ndarray + Library resolution in units of km/ (dispersion) to be subtracted from the smoothing kernel. + + Returns + ------- + outflux : ndarray + If instrument resolution is not None, this is the smoothed flux on + the observed ``wavelength`` grid. If resolution is None, this just + passes ``influx`` right back again. + """ + if self.resolution is None: + # no-op + return influx + + if libres: + kernel = np.sqrt(self.resolution**2 - libres**2) + else: + kernel = self.resolution + out = smoothspec(obswave, influx, kernel, + outwave=self.wavelength, + **self.instrument_smoothing_parameters) return out def to_oldstyle(self): @@ -185,7 +225,10 @@ def to_oldstyle(self): return obs -def from_oldstyle(obs): +def from_oldstyle(obs, **kwargs): """Convert from an oldstyle dictionary to a list of observations """ - return [Spectrum().from_oldstyle(obs), Photometry().from_oldstyle(obs)] \ No newline at end of file + obslist = [Spectrum(**obs), Photometry(**obs)] + #[o.rectify() for o in obslist] + + return obslist diff --git a/prospect/utils/obsutils.py b/prospect/data/obsutils.py similarity index 100% rename from prospect/utils/obsutils.py rename to prospect/data/obsutils.py diff --git a/prospect/fitting/fitting.py b/prospect/fitting/fitting.py index 7356d91d..4afa53bd 100755 --- a/prospect/fitting/fitting.py +++ b/prospect/fitting/fitting.py @@ -36,7 +36,7 @@ def lnprobfn(theta, model=None, observations=None, sps=None, theta : ndarray of shape ``(ndim,)`` Input parameter vector - model : instance of the :py:class:`prospect.models.SedModel` + model : instance of the :py:class:`prospect.models.SpecModel` The model parameterization and parameter state. Must have :py:meth:`predict()` defined @@ -131,7 +131,7 @@ def fit_model(observations, model, sps, lnprobfn=lnprobfn, observations : list of :py:class:`observate.Observation` instances The data to be fit. - model : instance of the :py:class:`prospect.models.SedModel` + model : instance of the :py:class:`prospect.models.SpecModel` The model parameterization and parameter state. It will be passed to ``lnprobfn``. @@ -226,7 +226,7 @@ def run_minimize(observations=None, model=None, sps=None, lnprobfn=lnprobfn, observations : list of :py:class:`observate.Observation` instances The data to be fit. - model : instance of the :py:class:`prospect.models.SedModel` + model : instance of the :py:class:`prospect.models.SpecModel` The model parameterization and parameter state. It will be passed to ``lnprobfn``. @@ -315,7 +315,7 @@ def run_emcee(observations, model, sps, lnprobfn=lnprobfn, observations : list of :py:class:`observate.Observation` instances The data to be fit. - model : instance of the :py:class:`prospect.models.SedModel` + model : instance of the :py:class:`prospect.models.SpecModel` The model parameterization and parameter state. It will be passed to ``lnprobfn``. @@ -420,7 +420,7 @@ def run_dynesty(obs, model, sps, noise, lnprobfn=lnprobfn, observations : list of :py:class:`observate.Observation` instances The data to be fit. - model : instance of the :py:class:`prospect.models.SedModel` + model : instance of the :py:class:`prospect.models.SpecModel` The model parameterization and parameter state. It will be passed to ``lnprobfn``. diff --git a/prospect/models/__init__.py b/prospect/models/__init__.py index 407c21ef..59d362c9 100644 --- a/prospect/models/__init__.py +++ b/prospect/models/__init__.py @@ -9,7 +9,10 @@ from .sedmodel import PolySpecModel, SplineSpecModel from .sedmodel import AGNSpecModel, LineSpecModel -__all__ = ["ProspectorParams", "SpecModel", + +__all__ = ["ProspectorParams", + "SpecModel", "PolySpecModel", "SplineSpecModel", - "LineSpecModel", "AGNSpecModel", - "SedModel"] + "LineSpecModel", "AGNSpecModel" + ] + diff --git a/prospect/models/model_setup.py b/prospect/models/model_setup.py deleted file mode 100644 index 1a1063e3..00000000 --- a/prospect/models/model_setup.py +++ /dev/null @@ -1,237 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import sys, os, getopt, json, warnings -from copy import deepcopy -import numpy as np -from . import parameters -from ..utils.obsutils import fix_obs - -"""This module has methods to take a .py file containing run parameters, model -parameters and other info and return a run_params dictionary, an obs -dictionary, and a model. It also has methods to parse command line options and -return an sps object and noise objects. - -Most of the load_ methods are just really shallow wrappers on -```import_module_from_file(param_file).load_(**kwargs)``` and could probably -be done away with at this point, as they add a mostly useless layer of -abstraction. Kept here for future flexibility. -""" - -__all__ = ["parse_args", "import_module_from_file", "get_run_params", - "load_model", "load_obs", "load_sps", "load_gp", "show_syntax"] - - -deprecation_msg = ("Use argparse based operation; usage via prospector_*.py " - "scripts will be disabled in the future.") - - -def parse_args(argv, argdict={}): - """Parse command line arguments, allowing for optional arguments. - Simple/Fragile. - """ - warnings.warn(deprecation_msg, FutureWarning) - args = [sub for arg in argv[1:] for sub in arg.split('=')] - for i, a in enumerate(args): - if (a[:2] == '--'): - abare = a[2:] - if abare == 'help': - show_syntax(argv, argdict) - sys.exit() - else: - continue - if abare in argdict.keys(): - apo = deepcopy(args[i+1]) - func = type(argdict[abare]) - try: - argdict[abare] = func(apo) - if func is bool: - argdict[abare] = apo in ['True', 'true', 'T', 't', 'yes'] - except TypeError: - argdict[abare] = apo - return argdict - - -def get_run_params(param_file=None, argv=None, **kwargs): - """Get a run_params dictionary from the param_file (if passed) otherwise - return the kwargs dictionary. - - The order of precedence of parameter specification locations is: - * 1. param_file (lowest) - * 2. kwargs passsed to this function - * 3. command line arguments - """ - warnings.warn(deprecation_msg, FutureWarning) - rp = {} - if param_file is None: - ext = "" - else: - ext = param_file.split('.')[-1] - if ext == 'py': - setup_module = import_module_from_file(param_file) - rp = deepcopy(setup_module.run_params) - elif ext == 'json': - rp, mp = parameters.read_plist(param_file) - if kwargs is not None: - kwargs.update(rp) - rp = kwargs - if argv is not None: - rp = parse_args(argv, argdict=rp) - rp['param_file'] = param_file - - return rp - - -def load_sps(param_file=None, **kwargs): - """Return an ``sps`` object which is used to hold spectral libraries, - perform interpolations, convolutions, etc. - """ - warnings.warn(deprecation_msg, FutureWarning) - ext = param_file.split('.')[-1] - assert ext == 'py' - setup_module = import_module_from_file(param_file) - - if hasattr(setup_module, 'load_sps'): - builder = setup_module.load_sps - elif hasattr(setup_module, 'build_sps'): - builder = setup_module.build_sps - else: - warnings.warn("Could not find load_sps or build_sps methods in param_file") - return None - - sps = builder(**kwargs) - - return sps - - -def load_gp(param_file=None, **kwargs): - """Return two Gaussian Processes objects, either using BSFH's internal GP - objects or George. - - :returns gp_spec: - The gaussian process object to use for the spectroscopy. - - :returns gp_phot: - The gaussian process object to use for the photometry. - """ - warnings.warn(deprecation_msg, FutureWarning) - ext = param_file.split('.')[-1] - assert ext == "py" - setup_module = import_module_from_file(param_file) - - if hasattr(setup_module, 'load_gp'): - builder = setup_module.load_gp - elif hasattr(setup_module, 'build_noise'): - builder = setup_module.build_noise - else: - warnings.warn("Could not find load_gp or build_noise methods in param_file") - return None, None - - spec_noise, phot_noise = builder(**kwargs) - - return spec_noise, phot_noise - - -def load_model(param_file=None, **kwargs): - """Load the model object from a model config list given in the config file. - - :returns model: - An instance of the parameters.ProspectorParams object which has - been configured - """ - warnings.warn(deprecation_msg, FutureWarning) - ext = param_file.split('.')[-1] - assert ext == 'py' - setup_module = import_module_from_file(param_file) - #mp = deepcopy(setup_module.model_params) - - if hasattr(setup_module, 'load_model'): - builder = setup_module.load_model - elif hasattr(setup_module, 'build_model'): - builder = setup_module.build_model - else: - warnings.warn("Could not find load_model or build_model methods in param_file") - return None - - model = builder(**kwargs) - - return model - - -def load_obs(param_file=None, **kwargs): - """Load the obs dictionary using the `obs` attribute or methods in - ``param_file``. kwargs are passed to these methods and ``fix_obs()`` - - :returns obs: - A dictionary of observational data. - """ - warnings.warn(deprecation_msg, FutureWarning) - ext = param_file.split('.')[-1] - obs = None - assert ext == 'py' - print('reading py script {}'.format(param_file)) - setup_module = import_module_from_file(param_file) - - if hasattr(setup_module, 'obs'): - return fix_obs(deepcopy(setup_module.obs)) - if hasattr(setup_module, 'load_obs'): - builder = setup_module.load_obs - elif hasattr(setup_module, 'build_obs'): - builder = setup_module.build_obs - else: - warnings.warn("Could not find load_obs or build_obs methods in param_file") - return None - - obs = builder(**kwargs) - obs = fix_obs(obs, **kwargs) - - return obs - - -def import_module_from_file(path_to_file): - """This has to break everything ever, right? - """ - from importlib import import_module - path, filename = os.path.split(path_to_file) - modname = filename.replace('.py', '') - sys.path.insert(0, path) - user_module = import_module(modname) - sys.path.remove(path) - return user_module - - -def import_module_from_string(source, name, add_to_sys_modules=True): - """Well this seems dangerous. - """ - import imp - user_module = imp.new_module(name) - exec(source, user_module.__dict__) - if add_to_sys_modules: - sys.modules[name] = user_module - - return user_module - - -def show_syntax(args, ad): - """Show command line syntax corresponding to the provided arg dictionary - `ad`. - """ - print('Usage:\n {0} '.format(args[0]) + - ' '.join(['--{0}='.format(k) for k in ad.keys()])) - - -class Bunch(object): - """ Simple storage. - """ - def __init__(self, **kwargs): - self.__dict__.update(kwargs) - - -def custom_filter_dict(filename): - filter_dict = {} - with open(filename, 'r') as f: - for line in f: - ind, name = line.split() - filter_dict[name.lower()] = Bunch(index=int(ind)-1) - - return filter_dict diff --git a/pyproject.toml b/pyproject.toml index e83a3d11..56035247 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ test = ["pytest", "pytest-xdist"] [tool.setuptools] packages = ["prospect", - "prospect.models", "prospect.sources", + "prospect.models", "prospect.sources", "prospect.data", "prospect.likelihood", "prospect.fitting", "prospect.io", "prospect.plotting", "prospect.utils"] diff --git a/tests/test_eline.py b/tests/test_eline.py index 14d9a53c..05197f69 100644 --- a/tests/test_eline.py +++ b/tests/test_eline.py @@ -6,9 +6,9 @@ from sedpy import observate from prospect import prospect_args -from prospect.utils.obsutils import fix_obs +from prospect.data import Photometry, Spectrum, from_oldstyle from prospect.models.templates import TemplateLibrary -from prospect.models.sedmodel import SpecModel, SedModel +from prospect.models.sedmodel import SpecModel from prospect.sources import CSPSpecBasis @@ -61,7 +61,7 @@ def test_nebline_phot_addition(): wavelength=np.linspace(3000, 9000, 1000), spectrum=np.ones(1000), unc=np.ones(1000)*0.1) - obs = fix_obs(obs) + obslist = from_oldstyle(obs) sps = CSPSpecBasis(zcontinuous=1) @@ -81,26 +81,28 @@ def test_nebline_phot_addition(): model_pars["nebemlineinspec"]["init"] = False m2 = SpecModel(model_pars) - _, p1, _ = m1.predict(m1.theta, obs, sps) - _, p2, _ = m2.predict(m2.theta, obs, sps) + (s1, p1), _ = m1.predict(m1.theta, obslist, sps) + (s2, p2), _ = m2.predict(m2.theta, obslist, sps) # make sure some of the lines were important p1n = m1.nebline_photometry(filts) - assert np.any(p1n / p1 > 0.05) + assert np.any(p1n / p1[1] > 0.05) # make sure you got the same answer assert np.all(np.abs(p1 - p2) / p1 < 1e-3) def test_filtersets(): + """This test no longer relevant..... + """ fnames = [f"sdss_{b}0" for b in "ugriz"] flist = observate.load_filters(fnames) - fset = observate.FilterSet(fnames) obs = dict(wavelength=np.linspace(3000, 9000, 1000), spectrum=np.ones(1000), - unc=np.ones(1000)*0.1) - obs = fix_obs(obs) + unc=np.ones(1000)*0.1, + filters=fnames) + obslist = from_oldstyle(obs) sps = CSPSpecBasis(zcontinuous=1) @@ -120,31 +122,18 @@ def test_filtersets(): model_pars["nebemlineinspec"]["init"] = False models.append(SpecModel(model_pars)) - # test old usage w/ SedModel (mags computed in sps object) - model_pars = TemplateLibrary["parametric_sfh"] - model_pars["zred"]["init"] = zred - model_pars.update(TemplateLibrary["nebular"]) - models.append(SedModel(model_pars)) - for i, model in enumerate(models): - obs["filters"] = flist - _, plist, _ = model.predict(model.theta, obs, sps) - obs["filters"] = fset - _, pset, _ = model.predict(model.theta, obs, sps) + (_, pset), _ = model.predict(model.theta, obslist, sps) # make sure some of the filters are affected by lines + # ( nebular flux > 10% of total flux) if i == 1: nebphot = model.nebline_photometry(flist) assert np.any(nebphot / pset > 0.1) - dmag = np.abs(pset - plist) / plist - #print(plist) - #print(dmag) - # make sure photometry is consistent - assert np.all(dmag < 5e-2), f"photometry inconsistent between Filter list and FilterSet on model {i}: {dmag}" # make sure we actually used different filter types - assert np.any(dmag > 0) + # We always use filtersets now def test_eline_implementation(): @@ -158,7 +147,7 @@ def test_eline_implementation(): unc=np.ones(1000)*0.1, maggies=np.ones(len(filters))*1e-7, maggies_unc=np.ones(len(filters))*1e-8) - obs = fix_obs(obs) + obslist = from_oldstyle(obs) model_pars = TemplateLibrary["parametric_sfh"] model_pars.update(TemplateLibrary["nebular"]) @@ -170,21 +159,20 @@ def test_eline_implementation(): sps = CSPSpecBasis(zcontinuous=1) # generate with all fixed lines added - spec, phot, mfrac = model.predict(model.theta, obs=obs, sps=sps) + (spec, phot), mfrac = model.predict(model.theta, obslist, sps=sps) # test ignoring a line lya = "Ly-alpha 1215" model_pars["elines_to_ignore"] = dict(init=lya, isfree=False) model = SpecModel(model_pars) - spec_nolya, phot_nolya, mfrac = model.predict(model.theta, obs=obs, sps=sps) + (spec_nolya, phot_nolya), mfrac = model.predict(model.theta, obslist, sps=sps) assert np.any((phot - phot_nolya) / phot != 0.0) lint = np.trapz(spec - spec_nolya, obs["wavelength"]) assert lint > 0 # test igoring a line, phot only - obs_spec = obs.pop("spectrum") model = SpecModel(model_pars) - spec_nolya_2, phot_nolya_2, mfrac = model.predict(model.theta, obs=obs, sps=sps) + (phot_nolya_2,), mfrac = model.predict(model.theta, [obslist[1]], sps=sps) obs["spectrum"] = obs_spec assert np.all(phot_nolya == phot_nolya_2) diff --git a/tests/test_predict.py b/tests/test_predict.py index 70c1e61a..48f36a3a 100644 --- a/tests/test_predict.py +++ b/tests/test_predict.py @@ -7,7 +7,7 @@ from sedpy.observate import load_filters from prospect.sources import CSPSpecBasis from prospect.models import SpecModel, templates -from prospect.utils.observation import Spectrum, Photometry +from prospect.data import Spectrum, Photometry def build_model(add_neb=False): @@ -44,7 +44,7 @@ def build_sps(): return sps -if __name__ == "__main__": +def test_multispec(): obslist_single = build_obs(multispec=False) obslist = build_obs() model = build_model(add_neb=True) @@ -53,22 +53,30 @@ def build_sps(): predictions_single, mfrac = model.predict(model.theta, observations=obslist_single, sps=sps) predictions, mfrac = model.predict(model.theta, observations=obslist, sps=sps) - import matplotlib.pyplot as pl - fig, ax = pl.subplots() - ax.plot(obslist_single[0].wavelength, predictions_single[0]) - for p, o in zip(predictions, obslist): - if o.kind == "photometry": - ax.plot(o.wavelength, p, "o") - else: - ax.plot(o.wavelength, p) - - # -- TESting --- - observations = obslist - arr = np.zeros(model.ndim) + assert len(predictions_single) == 2 + assert len(predictions) == 3 + assert np.allclose(predictions_single[-1], predictions[-1]) + # TODO: turn this plot into an actual test + #import matplotlib.pyplot as pl + #fig, ax = pl.subplots() + #ax.plot(obslist_single[0].wavelength, predictions_single[0]) + #for p, o in zip(predictions, obslist): + # if o.kind == "photometry": + # ax.plot(o.wavelength, p, "o") + # else: + # ax.plot(o.wavelength, p) + + +def lnlike_testing(): + + # testing lnprobfn + observations = build_obs() + model = build_model(add_neb=True) from prospect.likelihood.likelihood import compute_lnlike from prospect.fitting import lnprobfn - sys.exit() + lnp = lnprobfn(model.theta, model=model, observations=obslist, sps=sps) + #%timeit model.prior_product(model.theta) #%timeit predictions, x = model.predict(model.theta + np.random.uniform(0, 3) * arr, observations=obslist, sps=sps) #%timeit lnp_data = [compute_lnlike(pred, obs, vectors={}) for pred, obs in zip(predictions, observations)] From 56781a17bf48086d066df8688d75750e3294b2c1 Mon Sep 17 00:00:00 2001 From: Benjamin Johnson Date: Mon, 2 May 2022 16:08:25 -0400 Subject: [PATCH 06/33] Update docs for new observation object, including noise model description. --- doc/dataformat.rst | 166 ++++++++++++++++++----------- doc/faq.rst | 2 +- doc/index.rst | 1 + doc/models.rst | 4 +- doc/noise.rst | 58 ++++++++++ doc/quickstart.rst | 38 +++---- doc/usage.rst | 10 +- prospect/data/observation.py | 3 +- prospect/likelihood/__init__.py | 2 +- prospect/likelihood/noise_model.py | 16 +-- tests/test_eline.py | 3 +- 11 files changed, 200 insertions(+), 103 deletions(-) create mode 100644 doc/noise.rst diff --git a/doc/dataformat.rst b/doc/dataformat.rst index ab5ac7be..bbbacfaf 100644 --- a/doc/dataformat.rst +++ b/doc/dataformat.rst @@ -1,84 +1,116 @@ Data Formats ============ -The ``obs`` Dictionary & Data Units + +The `Observation` class ----------------------------------- -|Codename| expects the data in the form of a dictionary, preferably returned by -a :py:meth:`build_obs` function (see below). This dictionary should have (at -least) the following keys and values: - -``"wavelength"`` - The wavelength vector for the spectrum, ndarray. - Units are vacuum Angstroms. - The model spectrum will be computed for each element of this vector. - Set to ``None`` if you have no spectrum. - If fitting observed frame photometry as well, - then these should be observed frame wavelengths. - -``"spectrum"`` - The flux vector for the spectrum, - ndarray of same length as the wavelength vector. - If absolute spectrophotometry is available, - the units of this spectrum should be Janskies divided by 3631 (i.e. maggies). - Also the ``rescale_spectrum`` run parameter should be False. - -``"unc"`` - The uncertainty vector (sigma), in same units as ``"spectrum"``, - ndarray of same length as the wavelength vector. - -``"mask"`` - A boolean array of same length as the wavelength vector, - where ``False`` elements are ignored in the likelihood calculation. - -``"filters"`` - A sequence of `sedpy `_ filter objects or filter names, - used to calculate model magnitudes. - -``"maggies"`` - An array of photometric flux densities, same length as ``"filters"``. The - units are *maggies*. Maggies are a linear flux density unit defined as - :math:`{\rm maggie} = 10^{-0.4 \, m_{AB}}` where :math:`m_{AB}` is the AB apparent - magnitude. That is, 1 maggie is the flux density in Janskys divided by 3631. - Set to ``None`` if you have no photometric data. - -``"maggies_unc"`` - An array of photometric flux uncertainties, same length as ``"filters"``, - that gives the photometric uncertainties in units of *maggies* - -``"phot_mask"`` - Like ``"mask"``, a boolean array, used to mask the - photometric data during the likelihood calculation. - Elements with ``False`` values are ignored in the likelihood calculation. - -If you do not have spectral or photometric data, you can set ``"wavelength": -None`` or ``"maggies": None`` respectively. Feel free to add keys that store -other metadata, these will be stored on output. However, for ease of storage -these keys should either be numpy arrays or basic python datatypes that are JSON -serializable (e.g. strings, ints, and floats and lists, dicts, and tuples -thereof.) - -The method :py:meth:`prospect.utils.obsutils.fix_obs` can be used as a shortcut -to add any of the missing required keys with their default values and ensure -that there is data to fit, e.g. +|Codename| expects the data in the form of list of ``Observations``, preferably +returned by :py:meth:`build_obs` (see below). Each Observation instance +corresponds to single dataset, and is basically a namespace that also supports +dict-like accessing of important attributes. In addition to holding data and +uncertainties thereon, they tell prospector what data to predict, contain +dataset-specific information for how to predict that data, and can even store +methods for computing likelihoods in the case of complicated, dataset-specific +noise models. There are two fundamental kinds of data, `Photometry` and +`Spectrum` that are each subclasses of `Observation`. They have the following +attributes, most of which can be also accessed as dictionary keys. + + +- ``wavelength`` + The wavelength vector for a `Spectrum`` or the effective wavelengths of the + filters in a `Photometry` data set, ndarray. Units are vacuum Angstroms. + Generally these should be observed frame wavelengths. + +- ``flux`` + The flux vector for a `Spectrum`, or the broadband fluxes for `Photometry` + ndarray of same length as the wavelength vector. For `Photometry` the units + are *maggies*. Maggies are a linear flux density unit defined as + :math:`{\rm maggie} = 10^{-0.4 \, m_{AB}}` where :math:`m_{AB}` is the AB + apparent magnitude. That is, 1 maggie is the flux density in Janskys divided + by 3631. If absolute spectrophotometry is available, the units for a + `Spectrum`` should also be maggies, otherwise photometry must be present and + a calibration vector must be supplied or fit. + +- ``uncertainty`` + The uncertainty vector (sigma), in same units as ``flux``, ndarray of same + length as the wavelength vector. + +- ``mask`` + A boolean array of same length as the wavelength vector, where ``False`` + elements are ignored in the likelihood calculation. + +- ``filters`` + For a `Photometry`, this is a list of strings corresponding to filter names + in `sedpy `_ + + +In addition to these attributes, several additional aspects of an observation +are used to help predict data or to compute likelihoods. The latter is +particularly important in the case of complicated noise models, including outlier +models, jitter terms, or covariant noise. + +- ``name`` + A string that can be used to identify the dataset. This can be useful for + dataset-specfic parameters. + +- ``resolution`` + For a `Spectrum` this defines the instrumental resolution. Analagously to + the ``filters`` attribute for `Photometry`, this knowledge is used to + accurately predict the model in the space of the data. + +- ``noise`` A :py:class:`NoiseModel` instance. By default this implements a + simple chi-square calculation of independent noise, but it can be + complexified. + + +Example +------- + +For a single observation, you might do something like: + +.. code-block:: python + + def build_obs(N): + from prospect.data import Spectrum + # dummy observation dictionary with just a spectrum + N = 1000 + spec = Spectrum(wavelength=np.linspace(3000, 5000, N), flux=np.zeros(N), uncertainty=np.ones(N)) + # ensure that this is a valid observation for fitting + spec = spec.rectify() + observations = [spec] + + return observations + +Note that `build_obs` returns a *list* even if there is only one dataset. + + +Converting from old style obs dictionaries +------------------------------------------ + +A tool exists to convert old combined observation dictionaries to a list of +`Observation` instances: .. code-block:: python - from prospect.utils.obsutils import fix_obs + from prospect.data import from_oldstyle # dummy observation dictionary with just a spectrum N = 1000 - obs = dict(wavelength=np.linspace(3000, 5000, N), spectrum=np.zeros(N), unc=np.ones(N)) - obs = fix_obs(obs) - assert "mask" in obs.keys() + obs = dict(wavelength=np.linspace(3000, 5000, N), spectrum=np.zeros(N), unc=np.ones(N), + filters=[f"sdss_{b}0" for b in "ugriz"], maggies=np.zeros(5), maggies_unc=np.ones(5)) + # ensure that this is a valid observation for fitting + spec, phot = from_oldstyle(obs) + print(spec.ndata, phot.filternames, phot.wavelength, phot.flux) + -It is recommended to use this method at the end of any `build_obs` function. The :py:meth:`build_obs` function --------------------------------- The :py:meth:`build_obs` function in the parameter file is written by the user. It should take a dictionary of command line arguments as keyword arguments. It -should return an ``obs`` dictionary described above. +should return a list of :py:class:`prospect.data.Observation` instances, +described above. Other than that, the contents can be anything. Within this function you might open and read FITS files, ascii tables, HDF5 files, or query SQL databases. You @@ -90,7 +122,11 @@ The point of this function is that you don't have to *externally* convert your data format to be what |Codename| expects and keep another version of files lying around: the conversion happens *within* the code itself. Again, the only requirement is that the function can take a ``run_params`` dictionary as keyword -arguments and that it return an ``obs`` dictionary as described below. +arguments and that it return :py:class:`prospect.data.Observation` instances, as + described above. Each observation instance should correspond to a particular + dataset (e.g. a broadband photomtric SED, the spectrum from a particular + instrument, or the spectrum from a particular night) that shares instrumental + and, more importantly, calibration parameters. .. |Codename| replace:: Prospector diff --git a/doc/faq.rst b/doc/faq.rst index 47b50daa..3dbbfad8 100644 --- a/doc/faq.rst +++ b/doc/faq.rst @@ -56,7 +56,7 @@ There are several extra considerations that come up when fitting spectroscopy Prospector includes methods for FFT based smoothing of the spectra, assuming a gaussian LSF (in either wavelength or velocity space). There is also the possibility of FFT based smoothing for wavelength dependent - gaussian dispersion (i.e. sigma_lambda = f(lambda) with f possibly a + Gaussian dispersion (i.e. sigma_lambda = f(lambda) with f possibly a polynomial of lambda). In practice the smoothed spectra will be a combination of the library resolution plus whatever FFT smoothing is applied. Hopefully this can be made to match your actual data resolution, diff --git a/doc/index.rst b/doc/index.rst index 376bfbc2..a6817275 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -33,6 +33,7 @@ Prospector allows you to: models sfhs nebular + noise output ref diff --git a/doc/models.rst b/doc/models.rst index 89c06f9f..4104b01e 100644 --- a/doc/models.rst +++ b/doc/models.rst @@ -190,8 +190,8 @@ inspect the free and fixed parameters in a given set, you can do something like # This dictionary can be updated or modified, to expand the model. model_params.update(TemplateLibrary["nebular"]) # Instantiate a model object - from prospect.models import SedModel - model = SedModel(model_params) + from prospect.models import SpecModel + model = SpecModel(model_params) The ``build_model()`` Method diff --git a/doc/noise.rst b/doc/noise.rst new file mode 100644 index 00000000..5e0ac575 --- /dev/null +++ b/doc/noise.rst @@ -0,0 +1,58 @@ +Noise Modeling +============ + +The noise model for each dataset is used to define the likelihood function, +given the observed data and the model prediction. As such, each dataset or +``Observation`` is assigned its own noise model. By default this is the basic +:math:`\chi^2` noise model. Complications are described below + + +Outliers +-------- + +For outlier modeling we follow `hogg10 `_ + +The key parameters of this noise model are the fraction of datapoints in a given +dataset that are outliers, and the typical variance of the outliers. Each +dataset might have a different outlier parameters, and so we need to find a way +to identify which outlier model parameter belongs with which dataset. This can +be done when the noise model is assigned to a dataset. For example, if we had a +single photometric dataset and a single spectroscopic dataset, with outlier +model parameters for each given by ``("f_outlier_phot", "nsigma_outlier_phot")`` +and ``("f_outlier_spec", "nsigma_outlier_spec")`` respectively (this is the +default parameter set available as a template) then we could associate these +parameter with each dataset as follows: + +.. code-block:: python + + from prospect.data import Photometry, Spectrum + from prospect.likelihood import NoiseModel + filternames = [f"sdss_{b}0" for b in "ugriz"] + N = len(fnames) + pdat = Photometry(filters=filternames, flux=np.zeros(N), uncertainty=np.ones(N), + noise=NoiseModel(frac_out_name="f_outlier_phot", + nsigma_out_name="nsigma_outlier_phot")) + N = 1000 + sdat = Spectrum(wavelength=np.linspace(4e3, 7e3, N), np.zeros(N), np.ones(N), + noise=NoiseModel(frac_out_name="f_outlier_spec", + nsigma_out_name="nsigma_outlier_spec")) + +This can be combined with other Noise models, as long as they have diagonal +(1-dimensional) covariance matices. + + +Jitter +------ + + + +Correlated Noise +---------------- + + +KDE Noise +--------- + + + +.. |Codename| replace:: Prospector diff --git a/doc/quickstart.rst b/doc/quickstart.rst index 773d6b2d..2181b976 100644 --- a/doc/quickstart.rst +++ b/doc/quickstart.rst @@ -35,25 +35,32 @@ get spectral data so we know the redshift. shdus = SDSS.get_spectra(plate=2101, mjd=53858, fiberID=220)[0] assert int(shdus[2].data["SpecObjID"][0]) == cat[0]["specObjID"] -Now we will put this data in a dictionary with format expected by prospector. We -convert the magnitudes to maggies, convert the magnitude errors to flux -uncertainties (including a noise floor), and load the filter transmission curves -using `sedpy`. We'll store the redshift here as well for convenience. Note that -for this example we do *not* attempt to fit the spectrum at the same time. +Now we will put this data in the format expected by prospector. We convert the +magnitudes to maggies, convert the magnitude errors to flux uncertainties +(including a noise floor), and load the filter transmission curves using +`sedpy`. We'll store the redshift here as well for convenience. Note that for +this example we do *not* attempt to fit the spectrum at the same time, though we +include an empty Spectrum data set to force a prediction of the full spectrum. .. code:: python from sedpy.observate import load_filters - from prospect.utils.obsutils import fix_obs + from prospect.data import Photometry, Spectrum filters = load_filters([f"sdss_{b}0" for b in bands]) maggies = np.array([10**(-0.4 * cat[0][f"cModelMag_{b}"]) for b in bands]) magerr = np.array([cat[0][f"cModelMagErr_{b}"] for b in bands]) magerr = np.clip(magerr, 0.05, np.inf) - obs = dict(wavelength=None, spectrum=None, unc=None, redshift=shdus[2].data[0]["z"], - maggies=maggies, maggies_unc=magerr * maggies / 1.086, filters=filters) - obs = fix_obs(obs) + pdat = Photometry(filters=filters, flux=maggies, uncertainty=magerr*maggies/1.086) + sdat = Spectrum(wavelength=None, flux=None, uncertainty=None) + observations = [sdat, pdat] + for obs in observations: + obs.redshift = shdus[2].data[0]["z"] + +In principle we could also add noise models for the spectral and photometric +data (e.g. to fit for the photometric noise floor), but we'll make the default +assumption of iid Gaussian noise for the moment. Build a Model @@ -77,13 +84,6 @@ should be replaced or adjusted depending on your particular science question. assert len(model.free_params) == 5 print(model) -In principle we could also add noise models for the spectral and photometric -data, but we'll make the default assumption of independent Gaussian noise for the moment. - -.. code:: python - - noise_model = (None, None) - Get a 'Source' -------------- @@ -112,7 +112,7 @@ the free parameters. current_parameters = ",".join([f"{p}={v}" for p, v in zip(model.free_params, model.theta)]) print(current_parameters) - spec, phot, mfrac = model.predict(model.theta, obs=obs, sps=sps) + (spec, phot), mfrac = model.predict(model.theta, observations, sps=sps) print(phot / obs["maggies"]) @@ -128,8 +128,8 @@ minutes. .. code:: python from prospect.fitting import lnprobfn, fit_model - fitting_kwargs = dict(nlive_init=400, nested_method="rwalk", nested_target_n_effective=1000, nested_dlogz_init=0.05) - output = fit_model(obs, model, sps, optimize=False, dynesty=True, lnprobfn=lnprobfn, noise=noise_model, **fitting_kwargs) + fitting_kwargs = dict(nlive_init=400, nested_method="rwalk", nested_target_n_effective=10000, nested_dlogz_init=0.05) + output = fit_model(obs, model, sps, optimize=False, dynesty=True, lnprobfn=lnprobfn, **fitting_kwargs) result, duration = output["sampling"] The ``result`` is a dictionary with keys giving the Monte Carlo samples of diff --git a/doc/usage.rst b/doc/usage.rst index ed52092e..feb9b7c9 100644 --- a/doc/usage.rst +++ b/doc/usage.rst @@ -100,7 +100,7 @@ The required methods in a **parameter file** for building the data and model are 1. :py:meth:`build_obs`: This function will take the command line arguments dictionary as keyword arguments - and returns on obs dictionary (see :doc:`dataformat` .) + and returns a list of `Observation` instances (see :doc:`dataformat` .) 2. :py:meth:`build_model`: This function will take the command line arguments dictionary dictionary as keyword arguments @@ -115,9 +115,11 @@ The required methods in a **parameter file** for building the data and model are building code and as such has a large memory footprint. 4. :py:meth:`build_noise`: - This function should return a :py:class:`NoiseModel` object for the spectroscopy and/or - photometry. Either or both can be ``None`` (the default) in which case the likelihood - will not include covariant noise or jitter and is equivalent to basic :math:`\chi^2`. + This function, if present, should add a :py:class:`NoiseModel` object to the + spectroscopy and/or photometry. If not present the likelihood will not + include covariant noise or jitter and is equivalent to basic :math:`\chi^2`. + + Using MPI --------- diff --git a/prospect/data/observation.py b/prospect/data/observation.py index 85936802..9b9e053d 100644 --- a/prospect/data/observation.py +++ b/prospect/data/observation.py @@ -94,7 +94,7 @@ def rectify(self, for_fitting=False): (np.isfinite(self.uncertainty)) & (self.uncertainty > 0)) - assert self.ndof == 0, f"{self.__repr__()} has no valid data to fit: check the sign of the masks." + assert self.ndof > 0, f"{self.__repr__()} has no valid data to fit: check the sign of the masks." assert hasattr(self, "noise") def render(self, wavelength, spectrum): @@ -116,6 +116,7 @@ def ndata(self): def serialize(self): obs = vars(self) serial = json.dumps(obs, cls=NumpyEncoder) + return serial class Photometry(Observation): diff --git a/prospect/likelihood/__init__.py b/prospect/likelihood/__init__.py index e29b349e..5cf69c3a 100644 --- a/prospect/likelihood/__init__.py +++ b/prospect/likelihood/__init__.py @@ -1,5 +1,5 @@ from .likelihood import * from .noise_model import * -__all__ = ["lnlike_spec", "lnlike_phot", "NoiseModel", "NoiseModelKDE"] +__all__ = ["lnlike_spec", "lnlike_phot", "NoiseModel", "NoiseModelCov", "NoiseModelKDE"] diff --git a/prospect/likelihood/noise_model.py b/prospect/likelihood/noise_model.py index 1f6b18de..5683cb21 100644 --- a/prospect/likelihood/noise_model.py +++ b/prospect/likelihood/noise_model.py @@ -19,14 +19,14 @@ class NoiseModel: f_outlier = 0 n_sigma_outlier = 50 - def __init__(self, f_outlier_name="f_outlier", n_sigma_name="nsigma_outlier"): - self.f_outlier_name = f_outlier_name - self.n_sigma_name = n_sigma_name + def __init__(self, frac_out_name="f_outlier", nsigma_out_name="nsigma_outlier"): + self.frac_out_name = frac_out_name + self.nsigma_out_name = nsigma_out_name self.kernels = [] def update(self, **params): - self.f_outlier = params.get(self.f_outlier_name, 0) - self.n_sigma_outlier = params.get(self.n_sigma_name, 50) + self.f_outlier = params.get(self.frac_out_name, 0) + self.n_sigma_outlier = params.get(self.nsigma_out_name, 50) [k.update(**params) for k in self.kernels] def lnlike(self, pred, obs, vectors={}): @@ -84,11 +84,11 @@ class NoiseModelCov(NoiseModel): """This object allows for 1d or 2d covariance matrices constructed from kernels """ - def __init__(self, f_outlier_name="f_outlier", n_sigma_name="nsigma_outlier", + def __init__(self, frac_out_name="f_outlier", nsigma_out_name="nsigma_outlier", metric_name='', mask_name='mask', kernels=[], weight_by=[]): - super().__init__(f_outlier_name=f_outlier_name, - n_sigma_name=n_sigma_name) + super().__init__(frac_out_name=frac_out_name, + nsigma_out_name=nsigma_out_name) assert len(kernels) == len(weight_by) self.kernels = kernels self.weight_names = weight_by diff --git a/tests/test_eline.py b/tests/test_eline.py index 05197f69..a25f1285 100644 --- a/tests/test_eline.py +++ b/tests/test_eline.py @@ -167,13 +167,12 @@ def test_eline_implementation(): model = SpecModel(model_pars) (spec_nolya, phot_nolya), mfrac = model.predict(model.theta, obslist, sps=sps) assert np.any((phot - phot_nolya) / phot != 0.0) - lint = np.trapz(spec - spec_nolya, obs["wavelength"]) + lint = np.trapz(spec - spec_nolya, obslist[0]["wavelength"]) assert lint > 0 # test igoring a line, phot only model = SpecModel(model_pars) (phot_nolya_2,), mfrac = model.predict(model.theta, [obslist[1]], sps=sps) - obs["spectrum"] = obs_spec assert np.all(phot_nolya == phot_nolya_2) #import matplotlib.pyplot as pl From 115b13253e9a04c5a49cee3c23f3beaaa9d21c94 Mon Sep 17 00:00:00 2001 From: Benjamin Johnson Date: Tue, 3 May 2022 10:03:38 -0400 Subject: [PATCH 07/33] Remove utils.smoothing, switch to using astro-sedpy for smoothing. --- doc/requirements.txt | 2 +- prospect/models/sedmodel.py | 2 +- prospect/plotting/sed.py | 2 +- prospect/sources/galaxy_basis.py | 2 +- prospect/sources/ssp_basis.py | 2 +- prospect/sources/star_basis.py | 2 +- prospect/utils/smoothing.py | 669 ------------------------------- tests/tests_smoothing.py | 2 +- 8 files changed, 7 insertions(+), 676 deletions(-) delete mode 100644 prospect/utils/smoothing.py diff --git a/doc/requirements.txt b/doc/requirements.txt index a2d257eb..ff97cf28 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -3,7 +3,7 @@ scipy >= 1.1.0 matplotlib >= 3.0 astropy h5py -astro-sedpy +astro-sedpy >= 0.3.0 sphinx-book-theme myst-nb numpydoc \ No newline at end of file diff --git a/prospect/models/sedmodel.py b/prospect/models/sedmodel.py index 94d9aba6..98525e45 100644 --- a/prospect/models/sedmodel.py +++ b/prospect/models/sedmodel.py @@ -13,11 +13,11 @@ from scipy.stats import multivariate_normal as mvn from sedpy.observate import getSED +from sedpy.smoothing import smoothspec from .parameters import ProspectorParams from ..sources.constants import to_cgs_at_10pc as to_cgs from ..sources.constants import cosmo, lightspeed, ckms, jansky_cgs -from ..utils.smoothing import smoothspec __all__ = ["SpecModel", "PolySpecModel", "SplineSpecModel", diff --git a/prospect/plotting/sed.py b/prospect/plotting/sed.py index c0625bab..6a3c12de 100644 --- a/prospect/plotting/sed.py +++ b/prospect/plotting/sed.py @@ -3,7 +3,7 @@ import numpy as np -from ..utils.smoothing import smoothspec +from sedpy.smoothing import smoothspec __all__ = ["convolve_spec", "to_nufnu"] diff --git a/prospect/sources/galaxy_basis.py b/prospect/sources/galaxy_basis.py index 6333f2ca..f767d150 100644 --- a/prospect/sources/galaxy_basis.py +++ b/prospect/sources/galaxy_basis.py @@ -2,8 +2,8 @@ import numpy as np from copy import deepcopy +from sedpy.smoothing import smoothspec from .ssp_basis import SSPBasis -from ..utils.smoothing import smoothspec from .constants import cosmo, lightspeed, jansky_cgs, to_cgs_at_10pc try: diff --git a/prospect/sources/ssp_basis.py b/prospect/sources/ssp_basis.py index 156a2b80..c4b17411 100644 --- a/prospect/sources/ssp_basis.py +++ b/prospect/sources/ssp_basis.py @@ -2,7 +2,7 @@ import numpy as np from numpy.polynomial.chebyshev import chebval -from ..utils.smoothing import smoothspec +from sedpy.smoothing import smoothspec from .constants import cosmo, lightspeed, jansky_cgs, to_cgs_at_10pc try: diff --git a/prospect/sources/star_basis.py b/prospect/sources/star_basis.py index abe733ad..7f532d61 100644 --- a/prospect/sources/star_basis.py +++ b/prospect/sources/star_basis.py @@ -3,7 +3,7 @@ from numpy.polynomial.chebyshev import chebval from scipy.spatial import Delaunay -from ..utils.smoothing import smoothspec +from sedpy.smoothing import smoothspec from .constants import lightspeed, lsun, jansky_cgs, to_cgs_at_10pc try: diff --git a/prospect/utils/smoothing.py b/prospect/utils/smoothing.py deleted file mode 100644 index dc90ef58..00000000 --- a/prospect/utils/smoothing.py +++ /dev/null @@ -1,669 +0,0 @@ -# Spectral smoothing functionality -# To do: -# 3) add extra zero-padding for FFT algorithms so they don't go funky at the -# edges? - -import numpy as np -from numpy.fft import fft, ifft, fftfreq, rfftfreq - -__all__ = ["smoothspec", "smooth_wave", "smooth_vel", "smooth_lsf", - "smooth_wave_fft", "smooth_vel_fft", "smooth_fft", "smooth_lsf_fft", - "mask_wave", "resample_wave"] - -ckms = 2.998e5 -sigma_to_fwhm = 2.355 - - -def smoothspec(wave, spec, resolution=None, outwave=None, - smoothtype="vel", fftsmooth=True, - min_wave_smooth=0, max_wave_smooth=np.inf, **kwargs): - """ - Parameters - ---------- - wave : ndarray of shape ``(N_pix,)`` - The wavelength vector of the input spectrum. Assumed Angstroms. - - spec : ndarray of shape ``(N_pix,)`` - The flux vector of the input spectrum. - - resolution : float - The smoothing parameter. Units depend on ``smoothtype``. - - outwave : ``None`` or ndarray of shape ``(N_pix_out,)`` - The output wavelength vector. If ``None`` then the input wavelength - vector will be assumed, though if ``min_wave_smooth`` or - ``max_wave_smooth`` are also specified, then the output spectrum may - have different length than ``spec`` or ``wave``, or the convolution may - be strange outside of ``min_wave_smooth`` and ``max_wave_smooth``. - Basically, always set ``outwave`` to be safe. - - smoothtype : string, optional, default: "vel" - The type of smoothing to perform. One of: - - + ``"vel"`` - velocity smoothing, ``resolution`` units are in km/s - (dispersion not FWHM) - + ``"R"`` - resolution smoothing, ``resolution`` is in units of - :math:`\lambda/ \sigma_\lambda` (where :math:`\sigma_\lambda` is - dispersion, not FWHM) - + ``"lambda"`` - wavelength smoothing. ``resolution`` is in units of - Angstroms - + ``"lsf"`` - line-spread function. Use an aribitrary line spread - function, which can be given as a vector the same length as ``wave`` - that gives the dispersion (in AA) at each wavelength. Alternatively, - if ``resolution`` is ``None`` then a line-spread function must be - present as an additional ``lsf`` keyword. In this case all additional - keywords as well as the ``wave`` vector will be passed to this ``lsf`` - function. - - fftsmooth : bool, optional, default: True - Switch to use FFTs to do the smoothing, usually resulting in massive - speedups of all algorithms. However, edge effects may be present. - - min_wave_smooth : float, optional default: 0 - The minimum wavelength of the input vector to consider when smoothing - the spectrum. If ``None`` then it is determined from the output - wavelength vector and padded by some multiple of the desired resolution. - - max_wave_smooth : float, optional, default: inf - The maximum wavelength of the input vector to consider when smoothing - the spectrum. If None then it is determined from the output wavelength - vector and padded by some multiple of the desired resolution. - - inres : float, optional - If given, this parameter specifies the resolution of the input. This - resolution is subtracted in quadrature from the target output resolution - before the kernel is formed. - - In certain cases this can be used to properly switch from resolution - that is constant in velocity to one that is constant in wavelength, - taking into account the wavelength dependence of the input resolution - when defined in terms of lambda. This is possible iff: - * ``fftsmooth`` is False - * ``smoothtype`` is ``"lambda"`` - * The optional ``in_vel`` parameter is supplied and True. - - The units of ``inres`` should be the same as the units of - ``resolution``, except in the case of switching from velocity to - wavelength resolution, in which case the units of ``inres`` should be - in units of lambda/sigma_lambda. - - in_vel : float (optional) - If supplied and True, the ``inres`` parameter is assumed to be in units - of lambda/sigma_lambda. This parameter is ignored **unless** the - ``smoothtype`` is ``"lambda"`` and ``fftsmooth`` is False. - - Returns - ------- - flux : ndarray of shape ``(N_pix_out,)`` - The smoothed spectrum on the `outwave` grid, ndarray. - """ - if smoothtype == 'vel': - linear = False - units = 'km/s' - sigma = resolution - fwhm = sigma * sigma_to_fwhm - if sigma == 0.0: - Rsigma = np.infty - R = np.infty - else: - Rsigma = ckms / sigma - R = ckms / fwhm - width = Rsigma - assert np.size(sigma) == 1, "`resolution` must be scalar for `smoothtype`='vel'" - - elif smoothtype == 'R': - linear = False - units = 'km/s' - Rsigma = resolution - sigma = ckms / Rsigma - fwhm = sigma * sigma_to_fwhm - R = ckms / fwhm - width = Rsigma - assert np.size(sigma) == 1, "`resolution` must be scalar for `smoothtype`='R'" - # convert inres from Rsigma to sigma (km/s) - try: - kwargs['inres'] = ckms / kwargs['inres'] - except(KeyError): - pass - - elif smoothtype == 'lambda': - linear = True - units = 'AA' - sigma = resolution - fwhm = sigma * sigma_to_fwhm - Rsigma = None - R = None - width = sigma - assert np.size(sigma) == 1, "`resolution` must be scalar for `smoothtype`='lambda'" - - elif smoothtype == 'lsf': - linear = True - width = 100 - sigma = resolution - - else: - raise ValueError("smoothtype {} is not valid".format(smoothtype)) - - # Mask the input spectrum depending on outwave or the wave_smooth kwargs - mask = mask_wave(wave, width=width, outwave=outwave, linear=linear, - wlo=min_wave_smooth, whi=max_wave_smooth, **kwargs) - w = wave[mask] - s = spec[mask] - if outwave is None: - outwave = wave - - # Choose the smoothing method - if smoothtype == 'lsf': - if fftsmooth: - smooth_method = smooth_lsf_fft - if sigma is not None: - # mask the resolution vector - sigma = resolution[mask] - else: - smooth_method = smooth_lsf - if sigma is not None: - # convert to resolution on the output wavelength grid - sigma = np.interp(outwave, wave, resolution) - elif linear: - if fftsmooth: - smooth_method = smooth_wave_fft - else: - smooth_method = smooth_wave - else: - if fftsmooth: - smooth_method = smooth_vel_fft - else: - smooth_method = smooth_vel - - # Actually do the smoothing and return - return smooth_method(w, s, outwave, sigma, **kwargs) - - -def smooth_vel(wave, spec, outwave, sigma, nsigma=10, inres=0, **extras): - """Smooth a spectrum in velocity space. This is insanely slow, but general - and correct. - - :param wave: - Wavelength vector of the input spectrum. - - :param spec: - Flux vector of the input spectrum. - - :param outwave: - Desired output wavelength vector. - - :param sigma: - Desired velocity resolution (km/s), *not* FWHM. - - :param nsigma: - Number of sigma away from the output wavelength to consider in the - integral. If less than zero, all wavelengths are used. Setting this - to some positive number decreses the scaling constant in the O(N_out * - N_in) algorithm used here. - - :param inres: - The velocity resolution of the input spectrum (km/s), *not* FWHM. - """ - sigma_eff_sq = sigma**2 - inres**2 - if np.any(sigma_eff_sq) < 0.0: - raise ValueError("Desired velocity resolution smaller than the value" - "possible for this input spectrum.".format(inres)) - # sigma_eff is in units of sigma_lambda / lambda - sigma_eff = np.sqrt(sigma_eff_sq) / ckms - - lnwave = np.log(wave) - flux = np.zeros(len(outwave)) - for i, w in enumerate(outwave): - x = (np.log(w) - lnwave) / sigma_eff - if nsigma > 0: - good = np.abs(x) < nsigma - x = x[good] - _spec = spec[good] - else: - _spec = spec - f = np.exp(-0.5 * x**2) - flux[i] = np.trapz(f * _spec, x) / np.trapz(f, x) - return flux - - -def smooth_vel_fft(wavelength, spectrum, outwave, sigma_out, inres=0.0, - **extras): - """Smooth a spectrum in velocity space, using FFTs. This is fast, but makes - some assumptions about the form of the input spectrum and can have some - issues at the ends of the spectrum depending on how it is padded. - - :param wavelength: - Wavelength vector of the input spectrum. An assertion error will result - if this is not a regular grid in wavelength. - - :param spectrum: - Flux vector of the input spectrum. - - :param outwave: - Desired output wavelength vector. - - :param sigma_out: - Desired velocity resolution (km/s), *not* FWHM. Scalar or length 1 array. - - :param inres: - The velocity resolution of the input spectrum (km/s), dispersion *not* - FWHM. - """ - # The kernel width for the convolution. - sigma = np.sqrt(sigma_out**2 - inres**2) - if sigma <= 0: - return np.interp(outwave, wavelength, spectrum) - - # make length of spectrum a power of 2 by resampling - wave, spec = resample_wave(wavelength, spectrum) - - # get grid resolution (*not* the resolution of the input spectrum) and make - # sure it's nearly constant. It should be, by design (see resample_wave) - invRgrid = np.diff(np.log(wave)) - assert invRgrid.max() / invRgrid.min() < 1.05 - dv = ckms * np.median(invRgrid) - - # Do the convolution - spec_conv = smooth_fft(dv, spec, sigma) - # interpolate onto output grid - if outwave is not None: - spec_conv = np.interp(outwave, wave, spec_conv) - - return spec_conv - - -def smooth_wave(wave, spec, outwave, sigma, nsigma=10, inres=0, in_vel=False, - **extras): - """Smooth a spectrum in wavelength space. This is insanely slow, but - general and correct (except for the treatment of the input resolution if it - is velocity) - - :param wave: - Wavelength vector of the input spectrum. - - :param spec: - Flux vector of the input spectrum. - - :param outwave: - Desired output wavelength vector. - - :param sigma: - Desired resolution (*not* FWHM) in wavelength units. This can be a - vector of same length as ``wave``, in which case a wavelength dependent - broadening is calculated - - :param nsigma: (optional, default=10) - Number of sigma away from the output wavelength to consider in the - integral. If less than zero, all wavelengths are used. Setting this - to some positive number decreses the scaling constant in the O(N_out * - N_in) algorithm used here. - - :param inres: (optional, default: 0.0) - Resolution of the input, in either wavelength units or - lambda/dlambda (c/v). Ignored if <= 0. - - :param in_vel: (optional, default: False) - If True, the input spectrum has been smoothed in velocity - space, and ``inres`` is assumed to be in lambda/dlambda. - - :returns flux: - The output smoothed flux vector, same length as ``outwave``. - """ - # sigma_eff is in angstroms - if inres <= 0: - sigma_eff_sq = sigma**2 - elif in_vel: - # Make an approximate correction for the intrinsic wavelength - # dependent dispersion. This sort of maybe works. - sigma_eff_sq = sigma**2 - (wave / inres)**2 - else: - sigma_eff_sq = sigma**2 - inres**2 - if np.any(sigma_eff_sq < 0): - raise ValueError("Desired wavelength sigma is lower than the value " - "possible for this input spectrum.") - - sigma_eff = np.sqrt(sigma_eff_sq) - flux = np.zeros(len(outwave)) - for i, w in enumerate(outwave): - x = (wave - w) / sigma_eff - if nsigma > 0: - good = np.abs(x) < nsigma - x = x[good] - _spec = spec[good] - else: - _spec = spec - f = np.exp(-0.5 * x**2) - flux[i] = np.trapz(f * _spec, x) / np.trapz(f, x) - return flux - - -def smooth_wave_fft(wavelength, spectrum, outwave, sigma_out=1.0, - inres=0.0, **extras): - """Smooth a spectrum in wavelength space, using FFTs. This is fast, but - makes some assumptions about the input spectrum, and can have some - issues at the ends of the spectrum depending on how it is padded. - - :param wavelength: - Wavelength vector of the input spectrum. - - :param spectrum: - Flux vector of the input spectrum. - - :param outwave: - Desired output wavelength vector. - - :param sigma: - Desired resolution (*not* FWHM) in wavelength units. - - :param inres: - Resolution of the input, in wavelength units (dispersion not FWHM). - - :returns flux: - The output smoothed flux vector, same length as ``outwave``. - """ - # restrict wavelength range (for speed) - # should also make nearest power of 2 - wave, spec = resample_wave(wavelength, spectrum, linear=True) - - # The kernel width for the convolution. - sigma = np.sqrt(sigma_out**2 - inres**2) - if sigma < 0: - return np.interp(wave, outwave, flux) - - # get grid resolution (*not* the resolution of the input spectrum) and make - # sure it's nearly constant. Should be by design (see resample_wave) - Rgrid = np.diff(wave) - assert Rgrid.max() / Rgrid.min() < 1.05 - dw = np.median(Rgrid) - - # Do the convolution - spec_conv = smooth_fft(dw, spec, sigma) - # interpolate onto output grid - if outwave is not None: - spec_conv = np.interp(outwave, wave, spec_conv) - return spec_conv - - -def smooth_lsf(wave, spec, outwave, sigma=None, lsf=None, return_kernel=False, - **kwargs): - """Broaden a spectrum using a wavelength dependent line spread function. - This function is only approximate because it doesn't actually do the - integration over pixels, so for sparsely sampled points you'll have - problems. This function needs to be checked and possibly rewritten. - - :param wave: - Input wavelengths. ndarray of shape (nin,) - - :param spec: - Input spectrum. ndarray of same shape as ``wave``. - - :param outwave: - Output wavelengths, ndarray of shape (nout,) - - :param sigma: (optional, default: None) - The dispersion (not FWHM) as a function of wavelength that you want to - apply to the input spectrum. ``None`` or ndarray of same length as - ``outwave``. If ``None`` then the wavelength dependent dispersion will be - calculated from the function supplied with the ``lsf`` keyward. - - :param lsf: - A function that returns the gaussian dispersion at each wavelength. - This is assumed to be in sigma, not FWHM. - - :param kwargs: - Passed to the function supplied in the ``lsf`` keyword. - - :param return_kernel: (optional, default: False) - If True, return the kernel used to broaden the spectrum as ndarray of - shape (nout, nin). - - :returns newspec: - The broadened spectrum, same length as ``outwave``. - """ - if (lsf is None) and (sigma is None): - return np.interp(outwave, wave, spec) - dw = np.gradient(wave) - if sigma is None: - sigma = lsf(outwave, **kwargs) - kernel = outwave[:, None] - wave[None, :] - kernel = (1 / (sigma * np.sqrt(np.pi * 2))[:, None] * - np.exp(-kernel**2 / (2 * sigma[:, None]**2)) * - dw[None, :]) - # should this be axis=0 or axis=1? - kernel = kernel / kernel.sum(axis=1)[:, None] - newspec = np.dot(kernel, spec) - # kernel /= np.trapz(kernel, wave, axis=1)[:, None] - # newspec = np.trapz(kernel * spec[None, :], wave, axis=1) - if return_kernel: - return newspec, kernel - return newspec - - -def smooth_lsf_fft(wave, spec, outwave, sigma=None, lsf=None, pix_per_sigma=2, - eps=0.25, preserve_all_input_frequencies=False, **kwargs): - """Smooth a spectrum by a wavelength dependent line-spread function, using - FFTs. - - :param wave: - Wavelength vector of the input spectrum. - - :param spectrum: - Flux vector of the input spectrum. - - :param outwave: - Desired output wavelength vector. - - :param sigma: (optional) - Dispersion (in same units as ``wave``) as a function `wave`. ndarray - of same length as ``wave``. If not given, sigma will be computed from - the function provided by the ``lsf`` keyword. - - :param lsf: (optional) - Function used to calculate the dispersion as a function of wavelength. - Must be able to take as an argument the ``wave`` vector and any extra - keyword arguments and return the dispersion (in the same units as the - input wavelength vector) at every value of ``wave``. If not provided - then ``sigma`` must be specified. - - :param pix_per_sigma: (optional, default: 2) - Number of pixels per sigma of the smoothed spectrum to use in - intermediate interpolation and FFT steps. Increasing this number will - increase the accuracy of the output (to a point), and the run-time, by - preserving all high-frequency information in the input spectrum. - - :param preserve_all_input_frequencies: (default: False) - This is a switch to use a very dense sampling of the input spectrum - that preserves all input frequencies. It can significantly increase - the call time for often modest gains... - - :param eps: (optional) - Deprecated. - - :param **kwargs: - All additional keywords are passed to the function supplied to the - ``lsf`` keyword, if present. - - :returns flux: - The input spectrum smoothed by the wavelength dependent line-spread - function. Same length as ``outwave``. - """ - # This is sigma vs lambda - if sigma is None: - sigma = lsf(wave, **kwargs) - - # Now we need the CDF of 1/sigma, which provides the relationship between x and lambda - # does dw go in numerator or denominator? - # I think numerator but should be tested - dw = np.gradient(wave) - cdf = np.cumsum(dw / sigma) - cdf /= cdf.max() - - # Now we create an evenly sampled grid in the x coordinate on the interval [0,1] - # and convert that to lambda using the cdf. - # This should result in some power of two x points, for FFT efficiency - - # Furthermore, the number of points should be high enough that the - # resolution is critically sampled. And we want to know what the - # resolution is in this new coordinate. - # There are two possible ways to do this - - # 1) Choose a point ~halfway in the spectrum - # half = len(wave) / 2 - # Now get the x coordinates of a point eps*sigma redder and bluer - # wave_eps = eps * np.array([-1, 1]) * sigma[halpha] - # x_h_eps = np.interp(wave[half] + wave_eps, wave, cdf) - # Take the differences to get dx and dsigma and ratio to get x per sigma - # x_per_sigma = np.diff(x_h_eps) / (2.0 * eps) #x_h_epsilon - x_h - - # 2) Get for all points (slower?): - sigma_per_pixel = (dw / sigma) - x_per_pixel = np.gradient(cdf) - x_per_sigma = np.nanmedian(x_per_pixel / sigma_per_pixel) - N = pix_per_sigma / x_per_sigma - - # Alternatively, just use the smallest dx of the input, divided by two for safety - # Assumes the input spectrum is critically sampled. - # And does not actually give x_per_sigma, so that has to be determined anyway - if preserve_all_input_frequencies: - # preserve more information in the input spectrum, even when way higher - # frequency than the resolution of the output. Leads to slightly more - # accurate output, but with a substantial time hit - N = max(N, 1.0 / np.nanmin(x_per_pixel)) - - # Now find the smallest power of two that divides the interval (0, 1) into - # segments that are smaller than dx - nx = int(2**np.ceil(np.log2(N))) - - # now evenly sample in the x coordinate - x = np.linspace(0, 1, nx) - dx = 1.0 / nx - - # And now we get the spectrum at the lambda coordinates of the even grid in x - lam = np.interp(x, cdf, wave) - newspec = np.interp(lam, wave, spec) - - # And now we convolve. - # If we did not know sigma in terms of x we could estimate it here - # from the resulting sigma(lamda(x)) / dlambda(x): - # dlam = np.gradient(lam) - # sigma_x = np.median(lsf(lam, **kwargs) / dlam) - # But the following just uses the fact that we know x_per_sigma (duh). - spec_conv = smooth_fft(dx, newspec, x_per_sigma) - - # and interpolate back to the output wavelength grid. - return np.interp(outwave, lam, spec_conv) - - -def smooth_fft(dx, spec, sigma): - """Basic math for FFT convolution with a gaussian kernel. - - :param dx: - The wavelength or velocity spacing, same units as sigma - - :param sigma: - The width of the gaussian kernel, same units as dx - - :param spec: - The spectrum flux vector - """ - # The Fourier coordinate - ss = rfftfreq(len(spec), d=dx) - # Make the fourier space taper; just the analytical fft of a gaussian - taper = np.exp(-2 * (np.pi ** 2) * (sigma ** 2) * (ss ** 2)) - ss[0] = 0.01 # hack - # Fourier transform the spectrum - spec_ff = np.fft.rfft(spec) - # Multiply in fourier space - ff_tapered = spec_ff * taper - # Fourier transform back - spec_conv = np.fft.irfft(ff_tapered) - return spec_conv - - -def mask_wave(wavelength, width=1, wlo=0, whi=np.inf, outwave=None, - nsigma_pad=20.0, linear=False, **extras): - """Restrict wavelength range (for speed) but include some padding based on - the desired resolution. - """ - # Base wavelength limits - if outwave is not None: - wlim = np.array([outwave.min(), outwave.max()]) - else: - wlim = np.squeeze(np.array([wlo, whi])) - # Pad by nsigma * sigma_wave - if linear: - wlim += nsigma_pad * width * np.array([-1, 1]) - else: - wlim *= (1 + nsigma_pad / width * np.array([-1, 1])) - mask = (wavelength > wlim[0]) & (wavelength < wlim[1]) - return mask - - -def resample_wave(wavelength, spectrum, linear=False): - """Resample spectrum, so that the number of elements is the next highest - power of two. This uses np.interp. Note that if the input wavelength grid - did not critically sample the spectrum then there is no gaurantee the - output wavelength grid will. - """ - wmin, wmax = wavelength.min(), wavelength.max() - nw = len(wavelength) - nnew = int(2.0**(np.ceil(np.log2(nw)))) - if linear: - Rgrid = np.diff(wavelength) # in same units as ``wavelength`` - w = np.linspace(wmin, wmax, nnew) - else: - Rgrid = np.diff(np.log(wavelength)) # actually 1/R - lnlam = np.linspace(np.log(wmin), np.log(wmax), nnew) - w = np.exp(lnlam) - # Make sure the resolution really is nearly constant - #assert Rgrid.max() / Rgrid.min() < 1.05 - s = np.interp(w, wavelength, spectrum) - return w, s - - -def subtract_input_resolution(res_in, res_target, smoothtype_in, smoothtype_target, wave=None): - """Subtract the input resolution (in quadrature) from a target output - resolution to get the width of the kernel that will convolve the input to - the output. Assumes all convolutions are with gaussians. - """ - if smoothtype_in == "R": - width_in = 1.0 / res_in - else: - width_in = res_in - if smoothtype_target == "R": - width_target = 1.0 / res_target - else: - width_target = res_target - - if smoothtype_in == smoothtype_target: - dwidth_sq = width_target**2 - width_in**2 - - elif (smoothtype_in == "vel") & (smoothype_target == "lambda"): - dwidth_sq = width_target**2 - (wave * width_in / ckms)**2 - - elif (smoothtype_in == "R") & (smoothype_target == "lambda"): - dwidth_sq = width_target**2 - (wave * width_in)**2 - - elif (smoothtype_in == "lambda") & (smoothtype_target == "vel"): - dwidth_sq = width_target**2 - (ckms * width_in / wave)**2 - - elif (smoothtype_in == "lambda") & (smoothtype_target == "R"): - dwidth_sq = width_target**2 - (width_in / wave)**2 - - elif (smoothtype_in == "R") & (smoothtype_target == "vel"): - print("srsly?") - return None - elif (smoothtype_in == "vel") & (smoothtype_target == "R"): - print("srsly?") - return None - - if np.any(dwidth_sq <= 0): - print("Warning: Desired resolution is better than input resolution") - dwidth_sq = np.clip(dwidth_sq, 0, np.inf) - - if smoothtype_target == "R": - return 1.0 / np.sqrt(dwidth_sq) - else: - return np.sqrt(dwidth_sq) - - return delta_width diff --git a/tests/tests_smoothing.py b/tests/tests_smoothing.py index 9ea92f13..59742339 100644 --- a/tests/tests_smoothing.py +++ b/tests/tests_smoothing.py @@ -6,7 +6,7 @@ # TODO: have some tests that do not require a python-fsps install import numpy as np import matplotlib.pyplot as pl -from prospect.utils.smoothing import smooth_fft, smooth_wave_fft, smooth_lsf_fft, smoothspec +from sedpy.smoothing import smooth_fft, smooth_wave_fft, smooth_lsf_fft, smoothspec def lsf(wave, wave0=5000, a=5e-5, b=1e-7, c=1.0, **extras): From fc68b5e4534196a0df3b134bdce17537194ab493 Mon Sep 17 00:00:00 2001 From: Benjamin Johnson Date: Wed, 4 May 2022 09:47:09 -0400 Subject: [PATCH 08/33] Updating demos and docs for new Observation class. --- demo/demo_mock_params.py | 142 +++----- demo/demo_params.py | 111 ++---- demo/tutorial.rst | 46 +-- doc/usage.rst | 42 ++- misc/diagnostics.py | 418 ---------------------- misc/fdot.py | 30 -- {misc => tests/misc}/test_compsp.py | 0 {misc => tests/misc}/test_sft.py | 0 {misc => tests/misc}/test_stepsfh.py | 0 {misc => tests/misc}/timing_smoothspec.py | 0 {misc => tests/misc}/timings_pyfsps.py | 0 {misc => tests/misc}/ztest.py | 0 12 files changed, 139 insertions(+), 650 deletions(-) delete mode 100644 misc/diagnostics.py delete mode 100644 misc/fdot.py rename {misc => tests/misc}/test_compsp.py (100%) rename {misc => tests/misc}/test_sft.py (100%) rename {misc => tests/misc}/test_stepsfh.py (100%) rename {misc => tests/misc}/timing_smoothspec.py (100%) rename {misc => tests/misc}/timings_pyfsps.py (100%) rename {misc => tests/misc}/ztest.py (100%) diff --git a/demo/demo_mock_params.py b/demo/demo_mock_params.py index b1d927c0..1647ff3a 100644 --- a/demo/demo_mock_params.py +++ b/demo/demo_mock_params.py @@ -18,56 +18,9 @@ spitzer = ['spitzer_irac_ch'+n for n in '1234'] -# -------------- -# RUN_PARAMS -# When running as a script with argparsing, these are ignored. Kept here for backwards compatibility. -# -------------- - -run_params = {'verbose': True, - 'debug': False, - 'outfile': 'output/demo_mock', - 'output_pickles': False, - # Optimization parameters - 'do_powell': False, - 'ftol': 0.5e-5, 'maxfev': 5000, - 'do_levenberg': True, - 'nmin': 10, - # emcee Fitter parameters - 'nwalkers': 64, - 'nburn': [32, 32, 64], - 'niter': 256, - 'interval': 0.25, - 'initial_disp': 0.1, - # dynesty Fitter parameters - 'nested_bound': 'multi', # bounding method - 'nested_sample': 'unif', # sampling method - 'nested_nlive_init': 100, - 'nested_nlive_batch': 100, - 'nested_bootstrap': 0, - 'nested_dlogz_init': 0.05, - 'nested_weight_kwargs': {"pfrac": 1.0}, - 'nested_target_n_effective': 10000, - # Mock data parameters - 'snr': 20.0, - 'add_noise': False, - 'filterset': galex + sdss + twomass, - # Input mock model parameters - 'mass': 1e10, - 'logzsol': -0.5, - 'tage': 12., - 'tau': 3., - 'dust2': 0.3, - 'zred': 0.1, - 'add_neb': False, - # SPS parameters - 'zcontinuous': 1, - } - - -# -------------- +# ---------------- # Model Definition -# -------------- - +# ---------------- def build_model(zred=0.0, add_neb=True, **extras): """Instantiate and return a ProspectorParams model subclass. @@ -116,12 +69,12 @@ def build_model(zred=0.0, add_neb=True, **extras): # --- Set initial values --- model_params["zred"]["init"] = zred - return sedmodel.SedModel(model_params) + return sedmodel.SpecModel(model_params) + # ------------------ # Observational Data # ------------------ - def build_obs(snr=10.0, filterset=["sdss_g0", "sdss_r0"], add_noise=True, **kwargs): """Make a mock dataset. Feel free to add more complicated kwargs, and put @@ -139,57 +92,53 @@ def build_obs(snr=10.0, filterset=["sdss_g0", "sdss_r0"], :param add_noise: (optional, boolean, default: True) If True, add a realization of the noise to the mock spectrum """ - from prospect.utils.obsutils import fix_obs + from prospect.data.observation import Photometry, Spectrum # We'll put the mock data in this dictionary, just as we would for real # data. But we need to know which bands (and wavelengths if doing # spectroscopy) in which to generate mock data. - mock = {} - mock['wavelength'] = None # No spectrum - mock['spectrum'] = None # No spectrum - mock['filters'] = load_filters(filterset) + smock = Spectrum() # no spectrum + pmock = Photometry(filters=filterset) # We need the models to make a mock sps = build_sps(**kwargs) - mod = build_model(**kwargs) + mock_model = build_model(**kwargs) # Now we get the mock params from the kwargs dict params = {} - for p in mod.params.keys(): + for p in mock_model.params.keys(): if p in kwargs: params[p] = np.atleast_1d(kwargs[p]) - # And build the mock - mod.params.update(params) - spec, phot, _ = mod.mean_model(mod.theta, mock, sps=sps) + # And build the mock spectrum and photometry + mock_model.params.update(params) + mock_theta = mock_model.theta + (spec, phot), _ = mock_model.predict(mock_theta, [smock, pmock], sps=sps) # Now store some ancillary, helpful info; # this information is not required to run a fit. - mock['true_spectrum'] = spec.copy() - mock['true_maggies'] = phot.copy() - mock['mock_params'] = deepcopy(mod.params) - mock['mock_snr'] = snr - mock["phot_wave"] = np.array([f.wave_effective for f in mock["filters"]]) + mock_info = dict(true_spectrum=spec.copy(), true_phot=phot.copy(), + mock_params=deepcopy(mock_model.params), mock_theta=mock_theta.copy(), + mock_snr=snr, mock_filters=filterset) # And store the photometry, adding noise if desired + pmock.flux = phot.copy() pnoise_sigma = phot / snr + pmock.uncertainty = pnoise_sigma if add_noise: pnoise = np.random.normal(0, 1, len(phot)) * pnoise_sigma - mock['maggies'] = phot + pnoise - else: - mock['maggies'] = phot.copy() - mock['maggies_unc'] = pnoise_sigma - mock['phot_mask'] = np.ones(len(phot), dtype=bool) + pmock.flux += pnoise + mock_info["noise_realization"] = pnoise - # This ensures all required keys are present - mock = fix_obs(mock) + # This ensures all required keys are present for fitting + pmock.rectify() + + return [smock, pmock], mock_info - return mock # -------------- # SPS Object # -------------- - def build_sps(zcontinuous=1, **extras): """Instantiate and return the Stellar Population Synthesis object. @@ -207,21 +156,28 @@ def build_sps(zcontinuous=1, **extras): compute_vega_mags=False) return sps + # ----------------- -# Noise Model +# Noise Modeling? # ------------------ +def build_noise(observations, **extras): + # use the defaults + return observations -def build_noise(**extras): - return None, None # ----------- # Everything # ------------ +def build_all(config): + + observations, mock_info = build_obs(**config) + observations = build_noise(observations, **config) + model = build_model(**config) + sps = build_sps(**config) -def build_all(**kwargs): + config["mock_info"] = mock_info - return (build_obs(**kwargs), build_model(**kwargs), - build_sps(**kwargs), build_noise(**kwargs)) + return (observations, model, sps) if __name__ == '__main__': @@ -251,27 +207,33 @@ def build_all(**kwargs): parser.add_argument('--mass', type=float, default=1e10, help="Stellar mass of the mock; solar masses formed") + # --- Configure --- args = parser.parse_args() - run_params = vars(args) - obs, model, sps, noise = build_all(**run_params) - - run_params["sps_libraries"] = sps.ssp.libraries - run_params["param_file"] = __file__ + config = vars(args) + config["param_file"] = __file__ + # --- Get fitting ingredients --- + obs, model, sps = build_all(config) + config["sps_libraries"] = sps.ssp.libraries print(model) if args.debug: sys.exit() - #hfile = setup_h5(model=model, obs=obs, **run_params) - hfile = "{0}_{1}_mcmc.h5".format(args.outfile, int(time.time())) - output = fit_model(obs, model, sps, noise, **run_params) + # --- Set up output --- + ts = time.strftime("%y%b%d-%H.%M", time.localtime()) + hfile = f"{args.outfile}_{ts}_result.h5" + + # --- Run the actual fit --- + output = fit_model(obs, model, sps, **config) + print("writing to {}".format(hfile)) writer.write_hdf5(hfile, run_params, model, obs, output["sampling"][0], output["optimization"][0], tsample=output["sampling"][1], toptimize=output["optimization"][1], - sps=sps) + sps=sps + ) try: hfile.close() diff --git a/demo/demo_params.py b/demo/demo_params.py index 09102465..b4d8a769 100644 --- a/demo/demo_params.py +++ b/demo/demo_params.py @@ -8,46 +8,6 @@ from prospect.io import write_results as writer -# -------------- -# RUN_PARAMS -# When running as a script with argparsing, these are ignored. Kept here for backwards compatibility. -# -------------- - -run_params = {'verbose': True, - 'debug': False, - 'outfile': 'demo_galphot', - 'output_pickles': False, - # Optimization parameters - 'do_powell': False, - 'ftol': 0.5e-5, 'maxfev': 5000, - 'do_levenberg': True, - 'nmin': 10, - # emcee fitting parameters - 'nwalkers': 128, - 'nburn': [16, 32, 64], - 'niter': 512, - 'interval': 0.25, - 'initial_disp': 0.1, - # dynesty Fitter parameters - 'nested_bound': 'multi', # bounding method - 'nested_sample': 'unif', # sampling method - 'nested_nlive_init': 100, - 'nested_nlive_batch': 100, - 'nested_bootstrap': 0, - 'nested_dlogz_init': 0.05, - 'nested_weight_kwargs': {"pfrac": 1.0}, - 'nested_target_n_effective': 10000, - # Obs data parameters - 'objid': 0, - 'phottable': 'demo_photometry.dat', - 'luminosity_distance': 1e-5, # in Mpc - # Model parameters - 'add_neb': False, - 'add_duste': False, - # SPS parameters - 'zcontinuous': 1, - } - # -------------- # Model Definition # -------------- @@ -91,7 +51,7 @@ def build_model(object_redshift=0.0, fixed_metallicity=None, add_duste=False, # controlled by the "zred" parameter and a WMAP9 cosmology. if luminosity_distance > 0: model_params["lumdist"] = {"N": 1, "isfree": False, - "init": luminosity_distance, "units":"Mpc"} + "init": luminosity_distance, "units": "Mpc"} # Adjust model initial values (only important for optimization or emcee) model_params["dust2"]["init"] = 0.1 @@ -135,7 +95,7 @@ def build_model(object_redshift=0.0, fixed_metallicity=None, add_duste=False, model_params.update(TemplateLibrary["nebular"]) # Now instantiate the model using this new dictionary of parameter specifications - model = sedmodel.SedModel(model_params) + model = sedmodel.SpecModel(model_params) return model @@ -188,7 +148,7 @@ def build_obs(objid=0, phottable='demo_photometry.dat', # import astropy.io.fits as pyfits # catalog = pyfits.getdata(phottable) - from prospect.utils.obsutils import fix_obs + from prospect.data.observation import Photometry, Spectrum # Here we will read in an ascii catalog of magnitudes as a numpy structured # array @@ -214,28 +174,22 @@ def build_obs(objid=0, phottable='demo_photometry.dat', # Build output dictionary. obs = {} - # This is a list of sedpy filter objects. See the - # sedpy.observate.load_filters command for more details on its syntax. - obs['filters'] = load_filters(filternames) # This is a list of maggies, converted from mags. It should have the same - # order as `filters` above. - obs['maggies'] = np.squeeze(10**(-mags/2.5)) - # HACK. You should use real flux uncertainties - obs['maggies_unc'] = obs['maggies'] * 0.07 - # Here we mask out any NaNs or infs - obs['phot_mask'] = np.isfinite(np.squeeze(mags)) - # We have no spectrum. - obs['wavelength'] = None - obs['spectrum'] = None + # order as `filternames` above. + maggies = np.squeeze(10**(-mags/2.5)) + pdat = Photometry(filters=filternames, flux=maggies, + uncertainty=maggies * 0.07, mask=np.isfinite(maggies)) + # We have no spectral data, but we still want to see the predicted spectrum + sdat = Spectrum() # Add unessential bonus info. This will be stored in output - #obs['dmod'] = catalog[ind]['dmod'] - obs['objid'] = objid + pdat.distance_modulus = dm + pdat.objid = objid - # This ensures all required keys are present and adds some extra useful info - obs = fix_obs(obs) + # This ensures all required keys are present + pdat.rectify() - return obs + return [sdat, pdat] # -------------- # SPS Object @@ -251,24 +205,28 @@ def build_sps(zcontinuous=1, compute_vega_mags=False, **extras): # Noise Model # ------------------ -def build_noise(**extras): - return None, None +def build_noise(observations, **extras): + # use the defaults + return observations # ----------- # Everything # ------------ def build_all(**kwargs): + observations = build_obs(**kwargs) + observations = build_noise(observations, **kwargs) + model = build_model(**kwargs) + sps = build_sps(**kwargs) - return (build_obs(**kwargs), build_model(**kwargs), - build_sps(**kwargs), build_noise(**kwargs)) + return (observations, model, sps) if __name__ == '__main__': - # - Parser with default arguments - + # --- Parser with default arguments --- parser = prospect_args.get_parser() - # - Add custom arguments - + # --- Add custom arguments --- parser.add_argument('--object_redshift', type=float, default=0.0, help=("Redshift for the model")) parser.add_argument('--add_neb', action="store_true", @@ -283,30 +241,33 @@ def build_all(**kwargs): parser.add_argument('--objid', type=int, default=0, help="zero-index row number in the table to fit.") + # --- Configure --- args = parser.parse_args() - run_params = vars(args) - obs, model, sps, noise = build_all(**run_params) - - run_params["sps_libraries"] = sps.ssp.libraries - run_params["param_file"] = __file__ + config = vars(args) + config["param_file"] = __file__ + # --- Get fitting ingredients --- + obs, model, sps = build_all(**config) + config["sps_libraries"] = sps.ssp.libraries print(model) if args.debug: sys.exit() - #hfile = setup_h5(model=model, obs=obs, **run_params) + # --- Set up output --- ts = time.strftime("%y%b%d-%H.%M", time.localtime()) - hfile = "{0}_{1}_result.h5".format(args.outfile, ts) + hfile = f"{args.outfile}_{ts}_result.h5" - output = fit_model(obs, model, sps, noise, **run_params) + # --- Run the actual fit --- + output = fit_model(obs, model, sps, **config) print("writing to {}".format(hfile)) writer.write_hdf5(hfile, run_params, model, obs, output["sampling"][0], output["optimization"][0], tsample=output["sampling"][1], toptimize=output["optimization"][1], - sps=sps) + sps=sps + ) try: hfile.close() diff --git a/demo/tutorial.rst b/demo/tutorial.rst index 7d724805..dff8a13b 100644 --- a/demo/tutorial.rst +++ b/demo/tutorial.rst @@ -40,19 +40,20 @@ The executable portion of the parameter file that comes after the ``if __name__ == "__main__"`` line is run when the parameter file is called. Here the possible command line arguments and their default values are defined, including any custom arguments that you might add. In this example we have added several -command line arguments that control how the data is read and how the The -supplied command line arguments are then parsed and placed in a dictionary. This -dictionary is passed to all the ingredient building methods (described below), -which return the data dictionary and necessary model objects. The data -dictionary and model objects are passed to a function that runs the prospector -fit (:py:func:`prospect.fitting.fit_model`). Finally, the fit results are -written to an output file. +command line arguments that control how the data is read and how the model is +built. The supplied command line arguments are then parsed and placed in a +**configuration** dictionary. This dictionary is passed to all the ingredient +building methods (described below), which return the required +:py:class:`Observation` objects and necessary model objects. The data and model +objects are passed to a function that runs the prospector fit +(:py:func:`prospect.fitting.fit_model`). Finally, the fit results are written to +an output file. **Building the fit ingredients: build_model** Several methods must be defined in the parameter file to build the ingredients -for the fit. The purpose of these functions and their required output are +for the fit. The purpose of these methods and their required output are described here. You will want to modify some of these for your specific model and data. Note that each of these functions will be passed a dictionary of command line arguments. These command line arguments, including any you add to @@ -65,7 +66,7 @@ First, the :py:func:`build_model` function is where the model that we will fit will be constructed. The specific model that you choose to construct depends on your data and your scientific question. -We have to specify a dictionary or list of model parameter specifications (see +We have to specify a dictionary of model parameter specifications (see :doc:`models`). Each specification is a dictionary that describes a single parameter. We can build the model by adjusting predefined sets of model parameter specifications, stored in the @@ -103,17 +104,18 @@ specifications. Since ``model_params`` is a dictionary (of dictionaries), you can update it with other parameter set dictionaries from the :py:class:`TemplateLibrary`. -Finally, the :py:func:`build_model` function takes the ``model_params`` dictionary or list that you build and -uses it to instantiate a :py:class:`SedModel` object. +Finally, the :py:func:`build_model` function takes the ``model_params`` +dictionary that you build and uses it to instantiate a :py:class:`SedModel` +object. .. code-block:: python - from prospect.models import SedModel + from prospect.models import SpecModel model_params = TemplateLibrary["parametric_sfh"] # Turn on nebular emission and add associated parameters model_params.update(TemplateLibrary["nebular"]) model_params["gas_logu"]["isfree"] = True - model = SedModel(model_params) + model = SpecModel(model_params) print(model) @@ -128,25 +130,25 @@ nebular and/or dust emission parameters are added to the model. **Building the fit ingredients: build_obs** -The next thing to look at is the :py:func:`build_obs` function. -This is where you take the data from whatever format you have and -put it into the dictionary format required by |Codename| for a single object. -This means you will have to modify this function heavily for your own use. -But it also means you can use your existing data formats. +The next thing to look at is the :py:func:`build_obs` function. This is where +you take the data from whatever format you have and put it into the format +required by |Codename| for a single object. This means you will have to modify +this function heavily for your own use. But it also means you can use your +existing data formats. Right now, the :py:func:`build_obs` function just reads ascii data from a file, picks out a row (corresponding to the photometry of a single galaxy), and then -makes a dictionary using data in that row. You'll note that both the datafile +makes a set of :py:class:`Observation`s using data in that row. You'll note that both the datafile name and the object number are keyword arguments to this function. That means they can be set at execution time on the command line, by also including those -variables in the ``run_params`` dictionary. We'll see an example later. +variables in the configuration dictionary. We'll see an example later. When you write your own :py:func:`build_obs` function, you can add all sorts of keyword arguments that control its output (for example, an object name or ID number that can be used to choose or find a single object in your data file). You can also import helper functions and modules. These can be either things like astropy, h5py, and sqlite or your own project specific modules and -functions. As long as the output dictionary is in the right format (see +functions. As long as the output data is in the right format (see dataformat.rst), the body of this function can do anything. **Building the fit ingredients: the rest** @@ -337,7 +339,7 @@ chain. # Get the modeled spectra and photometry. # These have the same shape as the obs['spectrum'] and obs['maggies'] arrays. - spec, phot, mfrac = model.predict(theta, obs=res['obs'], sps=sps) + (spec, phot), mfrac = model.predict(theta, obs=res['obs'], sps=sps) # mfrac is the ratio of the surviving stellar mass to the formed mass (the ``"mass"`` parameter). # Plot the model SED diff --git a/doc/usage.rst b/doc/usage.rst index feb9b7c9..950f0931 100644 --- a/doc/usage.rst +++ b/doc/usage.rst @@ -42,31 +42,43 @@ writes output. from prospect.io import write_results as writer from prospect import prospect_args - # Get the default argument parser + # --- Get the default argument parser --- parser = prospect_args.get_parser() # Add custom arguments that controll the build methods parser.add_argument("--custom_argument_1", ...) - # Parse the supplied arguments, convert to a dictionary, and add this file for logging purposes + + # --- Configure --- args = parser.parse_args() - run_params = vars(args) - run_params["param_file"] = __file__ + config = vars(args) + config["param_file"] = __file__ - # build the fit ingredients - obs, model, sps, noise = build_all(**run_params) - run_params["sps_libraries"] = sps.ssp.libraries + # --- Get fitting ingredients --- + obs, model, sps = build_all(**config) + config["sps_libraries"] = sps.ssp.libraries + print(model) + + if args.debug: + sys.exit() - # Set up an output file name and run the fit + # --- Set up output --- ts = time.strftime("%y%b%d-%H.%M", time.localtime()) - hfile = "{0}_{1}_mcmc.h5".format(args.outfile, ts) - output = fit_model(obs, model, sps, noise, **run_params) + hfile = f"{args.outfile}_{ts}_result.h5" - # Write results to output file + # --- Run the actual fit --- + output = fit_model(obs, model, sps, **config) + + print("writing to {}".format(hfile)) writer.write_hdf5(hfile, run_params, model, obs, - output["sampling"][0], output["optimization"][0], - tsample=output["sampling"][1], - toptimize=output["optimization"][1], - sps=sps) + output["sampling"][0], output["optimization"][0], + tsample=output["sampling"][1], + toptimize=output["optimization"][1], + sps=sps + ) + try: + hfile.close() + except(AttributeError): + pass Command Line Options and Custom Arguments diff --git a/misc/diagnostics.py b/misc/diagnostics.py deleted file mode 100644 index 2e1c70d6..00000000 --- a/misc/diagnostics.py +++ /dev/null @@ -1,418 +0,0 @@ -#Take the results from MCMC fitting of clusters -# and make diagnostic plots, or derive predictions for -# observables, etc.. - -import numpy as np -import matplotlib.pyplot as pl -import triangle -import pickle -try: - from cStringIO import StringIO -except ImportError: - from StringIO import StringIO - - -def diagnostic_plots(sample_file, sps, model_file=None, - powell_file=None, inmod=None, - showpars=None, - nspec=5, thin=10, start=0, outname=None): - """ - Plots a number of diagnostics. These include: - spectrum - - the observed spectrum, the spectra produced from a given number of samples of the - posterior parameter space, the spectrum produced from marginalized means of each - parameter, the spectrum at the initial position from Powell minimization, and the - applied calibration model. - spectrum_blue - - same as above but for the blue region of the spectrum - sed - - as for spectrum, but f_nu at the effective wavelength of the - filters is shown instead. - stars - - just the stellar dust model for samples of the posterior. - spectrum_residuals - - plots of spectrum residuals for a given number of samples of the posterior - sed_residuals - - broadband photometry residuals, in units of f_nu - x_vs_step - - the evolution of the walkers in each parameter as a function of iteration - lnp_vs_step - - the evolution of the walkers in likelihood - triangle - - a corner plot of parameter covariances - """ - - #read results and set up model - if outname is None: - outname = sample_file#''.join(sample_file.split('.')[:-1]) - sample_results, pr, model = read_pickles(sample_file, model_file=model_file, - powell_file=powell_file, inmod=inmod) - for k, v in model.params.iteritems(): - try: - sps.params[k] = v - except KeyError: - pass - - ## Plot spectra and SEDs - ## - #rindex = model_obs(sample_results, sps, photflag=0, outname=outname, nsample=nspec, - # wlo=3400, whi =10e3, start=start) - #_ = model_obs(sample_results, sps, photflag=0, outname=outname, rindex=rindex, - # wlo=3600, whi=4450, extraname='_blue', start=start) - #_ = model_obs(sample_results, sps, photflag=1, outname=outname, nsample=15, - # wlo=2500, whi=8.5e3, start=start) - - #stellar_pop(sample_results, sps, outname=outname, nsample=nspec, - # wlo=3500, whi=9.5e3, start=start, - # alpha = 0.5, color = 'green') - - ## Plot spectral and SED residuals - ## - #residuals(sample_results, sps, photflag=0, outname=outname, nsample=nspec, - # linewidth=0.5, alpha=0.3, color='blue', marker=None, start=start, rindex=rindex) - #residuals(sample_results, sps, photflag=1, outname = outname, nsample = 15, - # linewidth=0.5, alpha=0.3, color='blue', marker='o', start=start, rindex=rindex) - - ## Plot parameters versus step - ## - param_evol(sample_results, outname=outname, showpars=showpars) - - ## Plot lnprob vs step (with a zoom-in) - ## - pl.figure() - pl.clf() - nwalk = sample_results['lnprobability'].shape[0] - for j in range(nwalk): - pl.plot(sample_results['lnprobability'][j,:]) - pl.ylabel('lnP') - pl.xlabel('step #') - pl.savefig('{0}.lnP_vs_step.png'.format(outname)) - pl.close() - #yl = sample_results['lnprobability'].max() + np.array([-3.0 * sample_results['lnprobability'][:,-1].std(), 10]) - #pl.ylim(yl[0], yl[1]) - #pl.savefig('{0}.lnP_vs_step_zoom.png'.format(outname)) - #pl.close() - - ## Triangle plot - ## - subtriangle(sample_results, outname=outname, - showpars=showpars, - start=start, thin=thin) - - return outname, sample_results, model, pr - - -def model_comp(theta, model, sps, photflag=0, inlog=True): - """ - Generate and return various components of the total model for a - given set of parameters - """ - obs, _, _ = obsdict(model.obs, photflag=photflag) - mask = obs['mask'] - mu = model.mean_model(theta, sps=sps)[photflag][mask] - spec = obs['spectrum'][mask] - wave = obs['wavelength'][mask] - - if photflag == 0: - cal = model.calibration()[mask] - try: - #model.gp.sigma = obs['unc'][mask]/mu - s = model.params['gp_jitter'] - a = model.params['gp_amplitude'] - l = model.params['gp_length'] - model.gp.factor(s, a, l, check_finite = False, force=True) - if inlog: - mu = np.log(mu) - delta = model.gp.predict(spec - mu - cal) - else: - delta = model.gp.predict(spec - mu*cal) - except: - delta = 0 - else: - mask = np.ones(len(obs['wavelength']), dtype= bool) - cal = np.ones(len(obs['wavelength'])) - delta = np.zeros(len(obs['wavelength'])) - - return mu, cal, delta, mask, wave - - -def model_obs(sample_results, sps, photflag=0, outname=None, - start=0, rindex =None, nsample=10, - wlo=3500, whi=9e3, extraname=''): - - """ - Plot the observed spectrum and overlay samples of the model - posterior, including different components of that model. - """ - - title = ['Spectrum', 'SED (photometry)'] - start = np.min([start, sample_results['chain'].shape[1]]) - flatchain = sample_results['chain'][:,start:,:] - flatchain = flatchain.reshape(flatchain.shape[0] * flatchain.shape[1], - flatchain.shape[2]) - - # draw samples - if rindex is None: - rindex = np.random.uniform(0, flatchain.shape[0], nsample).astype( int ) - else: - nsample = len(rindex) - # set up the observation dictionary for spectrum or SED - obs, outn, marker = obsdict(sample_results, photflag) - - # set up plot window and plot data - pl.figure() - pl.axhline( 0, linestyle = ':', color ='black') - pl.plot(obs['wavelength'], obs['spectrum'], - marker=marker, linewidth=0.5, - color='blue', label='observed') - - # plot the minimization result - theta = sample_results['initial_center'] - ypred, res, cal, mask, spop = model_components(theta, sample_results, obs, sps, photflag=photflag) - pl.plot(obs['wavelength'][mask], ypred + res, - marker=marker, alpha=0.5, linewidth=0.3, - color='cyan', label='minimization result') - - # loop over drawn samples and plot the model components - label = ['full model', 'calib.', 'GP'] - for i in range(nsample): - theta = flatchain[rindex[i],:] - ypred, res, cal, mask, spop = model_components(theta, sample_results, obs, sps, photflag=photflag) - - pl.plot(obs['wavelength'][mask], np.zeros(mask.sum()) + res, - linewidth=0.5, alpha=0.5, color='red', label=label[2]) - pl.plot(obs['wavelength'], cal * sample_results['model'].params.get('linescale', 1.0), - linewidth=0.5, color='magenta', label=label[1]) - pl.plot(obs['wavelength'][mask], ypred + res, - marker=marker, alpha=0.5 , color='green', label=label[0]) - label = 3 * [None] - - pl.legend(loc=0, fontsize='small') - pl.xlim(wlo, whi) - pl.xlabel(r'$\AA$') - pl.ylabel('Rate') - pl.title(title[photflag]) - if outname is not None: - pl.savefig('{0}.{1}{2}.png'.format(outname, outn, extraname), dpi=300) - pl.close() - return rindex - -def stellar_pop(sample_results, sps, outname=None, normalize_by=None, - start=0, rindex=None, nsample=10, - wlo=3500, whi=9e3, extraname='', **kwargs): - """ - Plot samples of the posterior for just the stellar population and - dust model. - """ - start = np.min([start, sample_results['chain'].shape[1]]) - flatchain = sample_results['chain'][:,start:,:] - flatchain = flatchain.reshape(flatchain.shape[0] * flatchain.shape[1], - flatchain.shape[2]) - # draw samples - if rindex is None: - rindex = np.random.uniform(0, flatchain.shape[0], nsample).astype( int ) - # set up the observation dictionary for spectrum or SED - obs, outn, marker = obsdict(sample_results, 0) - - # set up plot window - pl.figure() - pl.axhline( 0, linestyle=':', color='black') - - # loop over drawn samples and plot the model components - label = ['Stars & Dust'] - xl = '' - for i in range(nsample): - theta = flatchain[rindex[i],:] - ypred, res, cal, mask, spop = model_components(theta, sample_results, obs, sps, photflag=0) - if normalize_by is not None: - spop /= spop[normalize_by] - xl = '/C' - pl.plot(obs['wavelength'], spop, - label = label[0], **kwargs) - label = 3 * [None] - - pl.legend(loc = 0, fontsize = 'small') - pl.xlim(wlo, whi) - pl.xlabel(r'$\AA$') - pl.ylabel(r'L$_\lambda {0}$ (L$_\odot/\AA$)'.format(xl)) - if outname is not None: - pl.savefig('{0}.{1}{2}.png'.format(outname, 'stars', extraname), dpi=300) - pl.close() - - -def residuals(sample_results, sps, photflag=0, outname=None, - nsample=5, rindex=None, start=0, - wlo=3600, whi=7500, **kwargs): - """ - Plot residuals of the observations from samples of the model - posterior. This is done in terms of relative, uncertainty - normalized, and absolute residuals. Extra keywords are passed to - plot(). - """ - - start = np.min([start, sample_results['chain'].shape[1]]) - flatchain = sample_results['chain'][:,start:,:] - flatchain = flatchain.reshape(flatchain.shape[0] * flatchain.shape[1], - flatchain.shape[2]) - # draw samples - if rindex is None: - rindex = np.random.uniform(0, flatchain.shape[0], nsample).astype( int ) - nsample = len(rindex) - - # set up the observation dictionary for spectrum or SED - obs, outn, marker = obsdict(sample_results, photflag) - - # set up plot window - fig, axes = pl.subplots(3,1) - # draw guidelines - [a.axhline( int(i==0), linestyle=':', color='black') for i,a in enumerate(axes)] - axes[0].set_ylabel('obs/model') - axes[0].set_ylim(0.5,1.5) - axes[0].set_xticklabels([]) - axes[1].set_ylabel(r'(obs-model)/$\sigma$') - axes[1].set_ylim(-10,10) - axes[1].set_xticklabels([]) - axes[2].set_ylabel(r'(obs-model)') - axes[2].set_xlabel(r'$\AA$') - - # loop over the drawn samples - for i in range(nsample): - theta = flatchain[rindex[i],:] - ypred, res, cal, mask, spop = model_components(theta, sample_results, obs, sps, photflag=photflag) - wave, ospec, mod = obs['wavelength'][mask], obs['spectrum'][mask], (ypred + res) - axes[0].plot(wave, ospec / mod, **kwargs) - axes[1].plot(wave, (ospec - mod) / obs['unc'][mask], **kwargs) - axes[2].plot(wave, (ospec - mod), **kwargs) - - if photflag == 0: - [a.set_xlim(wlo,whi) for a in axes] - - fig.subplots_adjust(hspace =0) - if outname is not None: - fig.savefig('{0}.{1}_residuals.png'.format(outname, outn), dpi=300) - pl.close() - -def obsdict(inobs, photflag): - """ - Return a dictionary of observational data, generated depending on - whether you're matching photometry or spectroscopy. - """ - obs = inobs.copy() - if photflag == 0: - outn = 'spectrum' - marker = None - elif photflag == 1: - outn = 'sed' - marker = 'o' - obs['wavelength'] = np.array([f.wave_effective for f in obs['filters']]) - obs['spectrum'] = 10**(0-0.4 * obs['mags']) - obs['unc'] = obs['mags_unc'] * obs['spectrum'] - obs['mask'] = obs['mags_unc'] > 0 - - return obs, outn, marker - -def param_evol(sample_results, outname=None, showpars=None, start=0): - """ - Plot the evolution of each parameter value with iteration #, for - each chain. - """ - - chain = sample_results['chain'][:,start:,:] - nwalk = chain.shape[0] - parnames = np.array(theta_labels(sample_results['model'].theta_desc)) - - #restrict to desired parameters - if showpars is not None: - ind_show = np.array([p in showpars for p in parnames], dtype= bool) - parnames = parnames[ind_show] - chain = chain[:,:,ind_show] - - #set up plot windows - ndim = len(parnames) - nx = int(np.floor(np.sqrt(ndim))) - ny = int(np.ceil(ndim*1.0/nx)) - sz = np.array([nx,ny]) - factor = 3.0 # size of one side of one panel - lbdim = 0.2 * factor # size of left/bottom margin - trdim = 0.2 * factor # size of top/right margin - whspace = 0.05*factor # w/hspace size - plotdim = factor * sz + factor *(sz-1)* whspace - dim = lbdim + plotdim + trdim - - fig, axes = pl.subplots(nx, ny, figsize = (dim[1], dim[0])) - lb = lbdim / dim - tr = (lbdim + plotdim) / dim - fig.subplots_adjust(left=lb[1], bottom=lb[0], right=tr[1], top=tr[0], - wspace=whspace, hspace=whspace) - - #sequentially plot the chains in each parameter - for i in range(ndim): - ax = axes.flatten()[i] - for j in range(nwalk): - ax.plot(chain[j,:,i]) - ax.set_title(parnames[i]) - if outname is not None: - fig.savefig('{0}.x_vs_step.png'.format(outname)) - pl.close() - -def theta_labels(desc): - """ - Using the theta_desc parameter dictionary, return a list of the model - parameter names that has the same aorder as the sampling chain array - """ - label, index = [], [] - for p in desc.keys(): - nt = desc[p]['N'] - name = p - if p is 'amplitudes': - name = 'A' - if nt is 1: - label.append(name) - index.append(desc[p]['i0']) - else: - for i in xrange(nt): - label.append(name+'{0}'.format(i+1)) - index.append(desc[p]['i0']+i) - - return [l for (i,l) in sorted(zip(index,label))] - -def sample_photometry(sample_results, sps, filterlist, - start=0, wthin=16, tthin=10): - - chain, model = sample_results['chain'], sample_results['model'] - for k, v in model.sps_fixed_params.iteritems(): - sps.params[k] = v - model.filters = filterlist - nwalkers, nt, ndim = chain.shape - wit = range(0,nwalkers,wthin) #walkers to use - tit = range(start, nt, thin) #time steps to use - phot = np.zeros( len(wit), len(tit), len(filterlist)) #build storage - for i in wit: - for j in tit: - s, p, m = model.model(chain[i,j,:], sps=sps) - phot[i,j,:] = p - #mass[i,j] = m - return phot, wit, tit - -## All this because scipy changed -# the name of one class, which shouldn't even be a class. - -renametable = { - 'Result': 'OptimizeResult', - } - -def mapname(name): - if name in renametable: - return renametable[name] - return name - -def mapped_load_global(self): - module = mapname(self.readline()[:-1]) - name = mapname(self.readline()[:-1]) - klass = self.find_class(module, name) - self.append(klass) - -def load(file): - unpickler = pickle.Unpickler(file) - unpickler.dispatch[pickle.GLOBAL] = mapped_load_global - return unpickler.load() diff --git a/misc/fdot.py b/misc/fdot.py deleted file mode 100644 index 672c796f..00000000 --- a/misc/fdot.py +++ /dev/null @@ -1,30 +0,0 @@ -# script to calculate the fractional change in SSP flux as a function -# of time. -import matplotlib.pyplot as pl -import numpy as np -import fsps -sps = fsps.StellarPopulation(zcontinuous=0) - -# compile all metallicities -for i, z in enumerate(sps.zlegend): - w, s = sps.get_spectrum(zmet=i+1) -spec, mass, lbol = sps.all_ssp_spec(peraa=True) - - -wmin, wmax, amin, amax, zmet = 1.5e3, 2e4, 0.01, 10, 4 - -ages = 10**(sps.ssp_ages-9) -waves = sps.wavelengths - -gwave = (waves < wmax) & (waves > wmin) -gage = (ages < amax) & (ages > amin) - -fdot = np.diff(spec, axis=1) -fbar = (spec[:,:-1,:] + spec[:,1:,:])/2.0 - -pl.imshow(np.squeeze((fdot/fbar)[np.ix_(gwave, gage, [zmet])]), - interpolation='nearest', aspect='auto') - - - - diff --git a/misc/test_compsp.py b/tests/misc/test_compsp.py similarity index 100% rename from misc/test_compsp.py rename to tests/misc/test_compsp.py diff --git a/misc/test_sft.py b/tests/misc/test_sft.py similarity index 100% rename from misc/test_sft.py rename to tests/misc/test_sft.py diff --git a/misc/test_stepsfh.py b/tests/misc/test_stepsfh.py similarity index 100% rename from misc/test_stepsfh.py rename to tests/misc/test_stepsfh.py diff --git a/misc/timing_smoothspec.py b/tests/misc/timing_smoothspec.py similarity index 100% rename from misc/timing_smoothspec.py rename to tests/misc/timing_smoothspec.py diff --git a/misc/timings_pyfsps.py b/tests/misc/timings_pyfsps.py similarity index 100% rename from misc/timings_pyfsps.py rename to tests/misc/timings_pyfsps.py diff --git a/misc/ztest.py b/tests/misc/ztest.py similarity index 100% rename from misc/ztest.py rename to tests/misc/ztest.py From ea31213f4f23993503f27479586258a77e744801 Mon Sep 17 00:00:00 2001 From: Benjamin Johnson Date: Sat, 18 Jun 2022 11:34:07 -0400 Subject: [PATCH 09/33] Observation class updates. Observation object methods to convert to structures, fits, h5, etc. Better docstrings. New Observation.Lines subclass. Fix import error in sedmodel. --- doc/quickstart.rst | 3 +- prospect/data/observation.py | 151 ++++++++++++++++++++++++++++++++--- prospect/models/__init__.py | 3 +- 3 files changed, 142 insertions(+), 15 deletions(-) diff --git a/doc/quickstart.rst b/doc/quickstart.rst index 2181b976..d544fd75 100644 --- a/doc/quickstart.rst +++ b/doc/quickstart.rst @@ -52,7 +52,8 @@ include an empty Spectrum data set to force a prediction of the full spectrum. magerr = np.array([cat[0][f"cModelMagErr_{b}"] for b in bands]) magerr = np.clip(magerr, 0.05, np.inf) - pdat = Photometry(filters=filters, flux=maggies, uncertainty=magerr*maggies/1.086) + pdat = Photometry(filters=filters, flux=maggies, uncertainty=magerr*maggies/1.086, + name=f'sdss_phot_specobjID{cat[0]["specObjID"]}') sdat = Spectrum(wavelength=None, flux=None, uncertainty=None) observations = [sdat, pdat] for obs in observations: diff --git a/prospect/data/observation.py b/prospect/data/observation.py index 9b9e053d..ac301e52 100644 --- a/prospect/data/observation.py +++ b/prospect/data/observation.py @@ -14,6 +14,7 @@ class NumpyEncoder(json.JSONEncoder): + def default(self, obj): if isinstance(obj, np.ndarray): return obj.tolist() @@ -36,6 +37,8 @@ class Observation: logify_spectrum = False alias = {} + meta = ["kind", "name"] + data = ["wavelength", "flux", "uncertainty", "mask"] def __init__(self, flux=None, @@ -46,8 +49,8 @@ def __init__(self, **kwargs ): - self.flux = flux - self.uncertainty = uncertainty + self.flux = np.array(flux) + self.uncertainty = np.array(uncertainty) self.mask = mask self.noise = noise self.name = name @@ -73,30 +76,38 @@ def from_oldstyle(self, **kwargs): if k in kwargs: setattr(self, v, kwargs[k]) - def rectify(self, for_fitting=False): + def rectify(self): """Make sure required attributes for fitting are present and have the appropriate sizes. Also auto-masks non-finite data or negative uncertainties. """ + if self.flux is None: + print(f"{self.__repr__} has no data") + return assert self.wavelength.ndim == 1, "`wavelength` is not 1-d array" assert self.ndata > 0, "no wavelength points supplied!" - assert self.flux is not None, " No data." assert self.uncertainty is not None, "No uncertainties." assert len(self.wavelength) == len(self.flux), "Flux array not same shape as wavelength." assert len(self.wavelength) == len(self.uncertainty), "Uncertainty array not same shape as wavelength." - # make mask array with automatic filters - marr = np.zeros(self.ndata, dtype=bool) - marr[self.mask] = True - self.mask = (marr & - (np.isfinite(self.flux)) & - (np.isfinite(self.uncertainty)) & - (self.uncertainty > 0)) + self._automask() assert self.ndof > 0, f"{self.__repr__()} has no valid data to fit: check the sign of the masks." assert hasattr(self, "noise") + def _automask(self): + # make mask array with automatic filters + marr = np.zeros(self.ndata, dtype=bool) + marr[self.mask] = True + if self.flux is not None: + self.mask = (marr & + (np.isfinite(self.flux)) & + (np.isfinite(self.uncertainty)) & + (self.uncertainty > 0)) + else: + self.mask = marr + def render(self, wavelength, spectrum): raise(NotImplementedError) @@ -113,11 +124,52 @@ def ndata(self): else: return len(self.wavelength) - def serialize(self): - obs = vars(self) + def to_json(self): + obs = {m: getattr(self, m) for m in self.meta + self.data} serial = json.dumps(obs, cls=NumpyEncoder) return serial + def to_struct(self, data_dtype=np.float32): + """Convert data to a structured array + """ + self._automask() + dtype = np.dtype([(c, data_dtype) for c in self.data]) + struct = np.zeros(self.ndata, dtype=dtype) + for c in self.data: + data = getattr(self, c) + try: + struct[c] = data + except(ValueError): + pass + return struct + + def to_fits(self, filename=""): + """ + """ + from astropy.io import fits + hdus = fits.HDUList([fits.PrimaryHDU(), + fits.BinTableHDU(self.to_struct())]) + meta = {m: getattr(self, m) for m in self.meta} + if "filternames" in meta: + meta["filters"] = ",".join(meta["filternames"]) + for k, v in meta.items(): + try: + for hdu in hdus: + hdu.header[k] = v + except(ValueError): + pass + if filename: + hdus.writeto(filename, overwrite=True) + hdus.close() + + def to_h5_dataset(self, handle): + dset = handle.create_dataset(self.name, data=self.to_struct()) + for m in self.meta: + try: + dset.attr[m] = getattr(self, m) + except: + pass + class Photometry(Observation): @@ -126,9 +178,25 @@ class Photometry(Observation): maggies_unc="uncertainty", filters="filters", phot_mask="mask") + meta = ["kind", "name", "filternames"] def __init__(self, filters=[], name="PhotA", **kwargs): + """On Observation object that holds photometric data + + Parameters + ---------- + filters : list of strings or list of `sedpy.observate.Filter` instances + The names or instances of Filters to use + flux : iterable of floats + The flux through the filters, in units of maggies + + uncertainty : iterable of floats + The uncertainty on the flux + + name : string, optional + The name for this set of data + """ if type(filters[0]) is str: self.filternames = filters else: @@ -160,6 +228,9 @@ class Spectrum(Observation): wavelength="wavelength", mask="mask") + data = ["wavelength", "flux", "uncertainty", "mask", + "resolution", "calibration"] + def __init__(self, wavelength=None, resolution=None, @@ -170,6 +241,15 @@ def __init__(self, """ Parameters ---------- + wavelength : iterable of floats + The wavelength of each flux measurement, in vacuum AA + + flux : iterable of floats + The flux at each wavelength, in units of maggies, same length as ``wavelength`` + + uncertainty : iterable of floats + The uncertainty on the flux + resolution : (optional, default: None) Instrumental resolution at each wavelength point in units of km/s dispersion (:math:`= c \, \sigma_\lambda / \lambda = c \, \FWHM_\lambda / 2.355 / \lambda = c / (2.355 \, R_\lambda)` @@ -226,6 +306,51 @@ def to_oldstyle(self): return obs +class Lines(Spectrum): + + kind = "spectrum" + alias = dict(spectrum="flux", + unc="uncertainty", + wavelength="wavelength", + mask="mask", + line_inds="line_ind") + + data = ["wavelength", "flux", "uncertainty", "mask", + "resolution", "calibration", "line_ind"] + + def __init__(self, + line_ind=None, + name="SpecA", + **kwargs): + + """ + Parameters + ---------- + line_ind : iterable of int + The index of the lines in the FSPS spectral line array. + + wavelength : iterable of floats + The wavelength of each flux measurement, in vacuum AA + + flux : iterable of floats + The flux at each wavelength, in units of maggies, same length as ``wavelength`` + + uncertainty : iterable of floats + The uncertainty on the flux + + resolution : (optional, default: None) + Instrumental resolution at each wavelength point in units of km/s + dispersion (:math:`= c \, \sigma_\lambda / \lambda = c \, \FWHM_\lambda / 2.355 / \lambda = c / (2.355 \, R_\lambda)` + where :math:`c=2.998e5 {\rm km}/{\rm s}` + + :param calibration: + not sure yet .... + """ + super(Lines, self).__init__(name=name, **kwargs) + assert (line_ind is not None), "You must identify the lines by their index in the FSPS emission line array" + self.line_ind = np.array(line_ind).as_type(int) + + def from_oldstyle(obs, **kwargs): """Convert from an oldstyle dictionary to a list of observations """ diff --git a/prospect/models/__init__.py b/prospect/models/__init__.py index 59d362c9..c263ee34 100644 --- a/prospect/models/__init__.py +++ b/prospect/models/__init__.py @@ -5,7 +5,8 @@ specifications. """ -from .sedmodel import ProspectorParams, SedModel, SpecModel + +from .sedmodel import ProspectorParams, SpecModel from .sedmodel import PolySpecModel, SplineSpecModel from .sedmodel import AGNSpecModel, LineSpecModel From 6e16e2db727eb950df9b0081f97ef956b928620b Mon Sep 17 00:00:00 2001 From: Benjamin Johnson Date: Sat, 18 Jun 2022 18:26:28 -0400 Subject: [PATCH 10/33] Starting on io simplification. Includes option to try to pickle model params. --- .github/workflows/tests.yml | 2 +- prospect/io/read_results.py | 114 +---------- prospect/io/write_results.py | 379 ++++++++++------------------------- prospect/models/sedmodel.py | 2 +- tests/test_predict.py | 42 ++-- 5 files changed, 133 insertions(+), 406 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index beb1b179..c91d1434 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -36,6 +36,6 @@ jobs: env: SPS_HOME: ${{ github.workspace }}/fsps - name: Run tests - run: python -m pytest --durations=0 --maxfail=1 -vs tests/ + run: python -m pytest --durations=0 --maxfail=1 -W ignore::DeprecationWarning --ignore tests/misc/ -vs tests/ env: SPS_HOME: ${{ github.workspace }}/fsps diff --git a/prospect/io/read_results.py b/prospect/io/read_results.py index 119e4e1f..71508841 100644 --- a/prospect/io/read_results.py +++ b/prospect/io/read_results.py @@ -69,34 +69,17 @@ def results_from(filename, model_file=None, dangerous=True, **kwargs): """ # Read the basic chain, parameter, and run_params info - if filename.split('.')[-1] == 'h5': - res = read_hdf5(filename, **kwargs) - if "_mcmc.h5" in filename: - mf_default = filename.replace('_mcmc.h5', '_model') - else: - mf_default = "x" - else: - with open(filename, 'rb') as rf: - res = pickle.load(rf) - mf_default = filename.replace('_mcmc', '_model') + res = read_hdf5(filename, **kwargs) - # Now try to read the model object itself from a pickle - if model_file is None: - mname = mf_default - else: - mname = model_file + # Now try to instantiate the model object from the paramfile param_file = (res['run_params'].get('param_file', ''), res.get("paramfile_text", '')) - model, powell_results = read_model(mname, param_file=param_file, - dangerous=dangerous, **kwargs) if dangerous: try: model = get_model(res) except: model = None res['model'] = model - if powell_results is not None: - res["powell_results"] = powell_results return res, res["obs"], model @@ -153,56 +136,6 @@ def emcee_restarter(restart_from="", niter=32, **kwargs): return obs, model, sps, noise, run_params -def read_model(model_file, param_file=('', ''), dangerous=False, **extras): - """Read the model pickle. This can be difficult if there are user defined - functions that have to be loaded dynamically. In that case, import the - string version of the paramfile and *then* try to unpickle the model - object. - - :param model_file: - String, name and path to the model pickle. - - :param dangerous: (default: False) - If True, try to import the given paramfile. - - :param param_file: - 2-element tuple. The first element is the name of the paramfile, which - will be used to set the name of the imported module. The second - element is the param_file contents as a string. The code in this - string will be imported. - """ - model = powell_results = None - if os.path.exists(model_file): - try: - with open(model_file, 'rb') as mf: - mod = pickle.load(mf) - except(AttributeError): - # Here one can deal with module and class names that changed - with open(model_file, 'rb') as mf: - mod = load(mf) - except(ImportError, KeyError): - # here we load the parameter file as a module using the stored - # source string. Obviously this is dangerous as it will execute - # whatever is in the stored source string. But it can be used to - # recover functions (especially dependcy functions) that are user - # defined - path, filename = os.path.split(param_file[0]) - modname = filename.replace('.py', '') - if dangerous: - user_module = import_module_from_string(param_file[1], modname) - with open(model_file, 'rb') as mf: - mod = pickle.load(mf) - - model = mod['model'] - - for k, v in list(model.theta_index.items()): - if type(v) is tuple: - model.theta_index[k] = slice(*v) - powell_results = mod['powell'] - - return model, powell_results - - def read_hdf5(filename, **extras): """Read an HDF5 file (with a specific format) into a dictionary of results. @@ -260,21 +193,10 @@ def read_hdf5(filename, **extras): res['rstate'] = unpick(res['rstate']) except: pass - #try: - # mp = [names_to_functions(p.copy()) for p in res['model_params']] - # res['model_params'] = mp - #except: - # pass return res -def read_pickles(filename, **kwargs): - """Alias for backwards compatability. Calls `results_from()`. - """ - return results_from(filename, **kwargs) - - def get_sps(res): """This gets exactly the SPS object used in the fiting (modulo any changes to FSPS itself). @@ -461,12 +383,6 @@ def traceplot(results, showpars=None, start=0, chains=slice(None), return fig -def param_evol(results, **kwargs): - """Backwards compatability - """ - return traceplot(results, **kwargs) - - def subcorner(results, showpars=None, truths=None, start=0, thin=1, chains=slice(None), logify=["mass", "tau"], **kwargs): @@ -553,12 +469,6 @@ def subcorner(results, showpars=None, truths=None, return fig -def subtriangle(results, **kwargs): - """Backwards compatability - """ - return subcorner(results, **kwargs) - - def compare_paramfile(res, filename): """Compare the runtime parameter file text stored in the `res` dictionary to the text of some existing file with fully qualified path `filename`. @@ -573,23 +483,3 @@ def compare_paramfile(res, filename): bbl = json.loads(b) bb = bbl.split('\n') pprint([l for l in unified_diff(aa, bb)]) - - -def names_to_functions(p): - """Replace names of functions (or pickles of objects) in a parameter - description with the actual functions (or pickles). - """ - from importlib import import_module - for k, v in list(p.items()): - try: - m = import_module(v[1]) - f = m.__dict__[v[0]] - except: - try: - f = pickle.loads(v) - except: - f = v - - p[k] = f - - return p diff --git a/prospect/io/write_results.py b/prospect/io/write_results.py index d8d7a7bc..765e41b3 100644 --- a/prospect/io/write_results.py +++ b/prospect/io/write_results.py @@ -15,7 +15,7 @@ _has_h5py_ = False -__all__ = ["githash", "write_pickles", "write_hdf5", +__all__ = ["githash", "write_hdf5", "chain_to_struct"] @@ -28,17 +28,6 @@ def pick(obj): return np.void(pickle.dumps(obj, 0)) -#def run_command(cmd): -# """Open a child process, and return its exit status and stdout. -# """ -# import subprocess -# child = subprocess.Popen(cmd, shell=True, stderr=subprocess.PIPE, -# stdin=subprocess.PIPE, stdout=subprocess.PIPE) -# out = [s for s in child.stdout] -# w = child.wait() -# return os.WEXITSTATUS(w), out - - def githash(**extras): """Pull out the git hash history for Prospector here. """ @@ -62,10 +51,14 @@ def paramfile_string(param_file=None, **extras): return pstr -def write_hdf5(hfile, run_params, model, obs, sampler=None, - optimize_result_list=None, tsample=0.0, toptimize=0.0, - sampling_initial_center=[], sps=None, write_model_params=True, - **extras): + +def write_hdf5(hfile, run_params, model, obs, + sampler=None, + optimize_result_list=None, + tsample=0.0, toptimize=0.0, + sampling_initial_center=[], + write_model_params=True, + sps=None, **extras): """Write output and information to an HDF5 file object (or group). @@ -94,55 +87,38 @@ def write_hdf5(hfile, run_params, model, obs, sampler=None, If a `prospect.sources.SSPBasis` object is supplied, it will be used to generate and store """ - - if not _has_h5py_: - warnings.warn("HDF5 file could not be opened, as h5py could not be imported.") - return - # If ``hfile`` is not a file object, assume it is a filename and open if type(hfile) is str: - # Check for existence of file, modify name if it already exists - if os.path.exists(hfile): - import time - time_string = time.strftime("%y%b%d-%H.%M", time.localtime()) - print("Appending current time ({0}) to output file ".format(time_string) + \ - "in order to guarantee a unique name.") - name, ext = os.path.splitext(hfile) - hfile = name+'_{0}'.format(time_string)+ext - print("New output filename: {0}".format(hfile)) - - hf = h5py.File(hfile, "a") + hf = h5py.File(hfile, "w") else: hf = hfile # ---------------------- # Sampling info - try: - # emcee - a = sampler.acceptance_fraction - write_emcee_h5(hf, sampler, model, sampling_initial_center, tsample) - except(AttributeError): - # dynesty or nestle - if sampler is None: - sdat = hf.create_group('sampling') - elif 'eff' in sampler: - write_dynesty_h5(hf, sampler, model, tsample) - else: - write_nestle_h5(hf, sampler, model, tsample) + if run_params.get("emcee", False): + chain, extras = emcee_to_struct(sampler, model) + elif run_params.get("dynesty", False): + chain, extras = dynesty_to_struct(sampler, model) + else: + chain, extras = None, None + write_sampling_h5(hf, chain, extras) + hf.flush() + + # ---------------------- + # High level parameter and version info + meta = metadata(run_params, model, write_model_params=write_model_params) + for k, v in meta.items(): + hf.attrs[k] = k + hf.flush() # ----------------- # Optimizer info + hf.attrs['optimizer_duration'] = json.dumps(toptimize) if optimize_result_list is not None: out = optresultlist_to_ndarray(optimize_result_list) mgroup = hf.create_group('optimization') mdat = mgroup.create_dataset('optimizer_results', data=out) - # ---------------------- - # High level parameter and version info - write_h5_header(hf, run_params, model, write_model_params=write_model_params) - hf.attrs['optimizer_duration'] = json.dumps(toptimize) - hf.flush() - # ---------------------- # Observational data write_obs_to_h5(hf, obs) @@ -170,143 +146,70 @@ def write_hdf5(hfile, run_params, model, obs, sampler=None, hf.close() -def write_emcee_h5(hf, sampler, model, sampling_initial_center, tsample): - """Write emcee information to the provided HDF5 file in the `sampling` - group. - """ - try: - sdat = hf['sampling'] - except(KeyError): - sdat = hf.create_group('sampling') - if 'chain' not in sdat: - sdat.create_dataset('chain', - data=sampler.chain) - lnp = sampler.lnprobability - if ((lnp.shape[0] != lnp.shape[1]) & - (lnp.T.shape == sampler.chain.shape[:-1])): - # hack to deal with emcee3rc lnprob transposition - lnp = lnp.T - sdat.create_dataset('lnprobability', data=lnp) - sdat.create_dataset('acceptance', - data=sampler.acceptance_fraction) - sdat.create_dataset('sampling_initial_center', - data=sampling_initial_center) - sdat.create_dataset('initial_theta', - data=model.initial_theta.copy()) - # JSON Attrs - sdat.attrs['rstate'] = pick(sampler.random_state) - sdat.attrs['sampling_duration'] = json.dumps(tsample) - sdat.attrs['theta_labels'] = json.dumps(list(model.theta_labels())) +def metadata(run_params, model, write_model_params=True): + meta = dict(run_params=run_params, + paramfile_text=paramfile_string(**run_params)) + if write_model_params: + from copy import deepcopy + meta["model_params"] = deepcopy(model.params) + for k, v in list(meta.items()): + try: + meta[k] = json.dumps(v) + except(TypeError): + meta[k] = pick(v) + except: + meta[k] = unserial - hf.flush() + return meta -def write_nestle_h5(hf, nestle_out, model, tsample): - """Write nestle results to the provided HDF5 file in the `sampling` group. - """ - try: - sdat = hf['sampling'] - except(KeyError): - sdat = hf.create_group('sampling') - sdat.create_dataset('chain', - data=nestle_out['samples']) - sdat.create_dataset('weights', - data=nestle_out['weights']) - sdat.create_dataset('lnlikelihood', - data=nestle_out['logl']) - sdat.create_dataset('lnprobability', - data=(nestle_out['logl'] + - model.prior_product(nestle_out['samples']))) - sdat.create_dataset('logvol', - data=nestle_out['logvol']) - sdat.create_dataset('logz', - data=np.atleast_1d(nestle_out['logz'])) - sdat.create_dataset('logzerr', - data=np.atleast_1d(nestle_out['logzerr'])) - sdat.create_dataset('h_information', - data=np.atleast_1d(nestle_out['h'])) - - # JSON Attrs - for p in ['niter', 'ncall']: - sdat.attrs[p] = json.dumps(nestle_out[p]) - sdat.attrs['theta_labels'] = json.dumps(list(model.theta_labels())) - sdat.attrs['sampling_duration'] = json.dumps(tsample) +def emcee_to_struct(sampler, model): + # preamble + samples = sampler.get_chain(flat=True) + lnprior = model.prior_product(samples) - hf.flush() + # chaincat & extras + chaincat = chain_to_struct(samples, model=model) + extras = dict(weights=None, + lnprobability=sampler.get_log_prob(flat=True), + lnlike=sampler.get_log_prob(flat=True) - lnprior, + acceptance=sampler.acceptance_fraction, + rstate=sampler.random_state) + return chaincat, extras -def write_dynesty_h5(hf, dynesty_out, model, tsample): - """Write nestle results to the provided HDF5 file in the `sampling` group. - """ - try: - sdat = hf['sampling'] - except(KeyError): - sdat = hf.create_group('sampling') - sdat.create_dataset('chain', - data=dynesty_out['samples']) - sdat.create_dataset('weights', - data=np.exp(dynesty_out['logwt']-dynesty_out['logz'][-1])) - sdat.create_dataset('logvol', - data=dynesty_out['logvol']) - sdat.create_dataset('logz', - data=np.atleast_1d(dynesty_out['logz'])) - sdat.create_dataset('logzerr', - data=np.atleast_1d(dynesty_out['logzerr'])) - sdat.create_dataset('information', - data=np.atleast_1d(dynesty_out['information'])) - sdat.create_dataset('lnlikelihood', - data=dynesty_out['logl']) - sdat.create_dataset('lnprobability', - data=(dynesty_out['logl'] + - model.prior_product(dynesty_out['samples']))) - sdat.create_dataset('efficiency', - data=np.atleast_1d(dynesty_out['eff'])) - sdat.create_dataset('niter', - data=np.atleast_1d(dynesty_out['niter'])) - sdat.create_dataset('samples_id', - data=np.atleast_1d(dynesty_out['samples_id'])) - - # JSON Attrs - sdat.attrs['ncall'] = json.dumps(dynesty_out['ncall'].tolist()) - sdat.attrs['theta_labels'] = json.dumps(list(model.theta_labels())) - sdat.attrs['sampling_duration'] = json.dumps(tsample) +def dynesty_to_struct(dyout, model): + # preamble + lnprior = model.prior_product(dyout['samples']) - hf.flush() + # chaincat & extras + chaincat = chain_to_struct(dyout["samples"], model=model) + extras = dict(weights=np.exp(dyout['logwt']-dyout['logz'][-1]), + lnprobability=dyout['logl'] + lnprior, + lnlike=dyout['logl'], + efficiency=np.atleast_1d(dyout['eff']), + logz=np.atleast_1d(dyout['logz']), + ncall=json.dumps(dyout['ncall'].tolist()) + ) + return chaincat, extras -def write_h5_header(hf, run_params, model, write_model_params=True): - """Write header information about the run. - """ - serialize = {'run_params': run_params, - 'model_params': [functions_to_names(p.copy()) - for p in model.config_list], - 'paramfile_text': paramfile_string(**run_params)} +def write_sampling_h5(hf, chain, extras): try: - hf.attrs['model_params'] = pick(serialize['model_params']) - except: - serialize['model_params'] = None - - if not write_model_params: - serialize = {'run_params': run_params, - 'model_params': None, - 'paramfile_text': paramfile_string(**run_params)} + sdat = hf['sampling'] + except(KeyError): + sdat = hf.create_group('sampling') - for k, v in list(serialize.items()): + sdat.create_dataset('chain', data=chain) + for k, v in extras.items(): try: - hf.attrs[k] = json.dumps(v) #, cls=NumpyEncoder) - except(TypeError): - # Should this fall back to pickle.dumps? - hf.attrs[k] = pick(v) - warnings.warn("Could not JSON serialize {}, pickled instead".format(k), - RuntimeWarning) + sdat.create_dataset(k, data=v) except: - hf.attrs[k] = unserial - warnings.warn("Could not serialize {}".format(k), RuntimeWarning) - hf.flush() + sdat.attrs[k] = v -def write_obs_to_h5(hf, obs): +def write_obs_to_h5(hf, obslist): """Write observational data to the hdf5 file """ try: @@ -314,25 +217,8 @@ def write_obs_to_h5(hf, obs): except(ValueError): # We already have an 'obs' group return - for k, v in list(obs.items()): - if k == 'filters': - try: - v = [f.name for f in v] - except: - pass - if isinstance(v, np.ndarray): - odat.create_dataset(k, data=v) - else: - try: - odat.attrs[k] = json.dumps(v) #, cls=NumpyEncoder) - except(TypeError): - # Should this fall back to pickle.dumps? - odat.attrs[k] = pick(v) - warnings.warn("Could not JSON serialize {}, pickled instead".format(k)) - except: - odat.attrs[k] = unserial - warnings.warn("Could not serialize {}".format(k)) - + for obs in obslist: + obs.to_h5_dataset(odat) hf.flush() @@ -348,18 +234,28 @@ def optresultlist_to_ndarray(results): return out -def chain_to_struct(chain, model=None, names=None): +def chain_to_struct(chain, model=None, names=None, **extras): """Given a (flat)chain (or parameter dictionary) and a model, convert the chain to a structured array - :param chain: - A chain, ndarry of shape (nsamples, ndim) or a dictionary of - parameters, values of which are numpy datatypes. + Parameters + ---------- + chain : ndarry of shape (nsamples, ndim) + A chain or a dictionary of parameters, values of which are numpy + datatypes. - :param model: - A ProspectorParams instance + model : A ProspectorParams instance + + names : list of strings - :returns struct: + extras : optional + Extra keyword arguments are assumed to be 1d ndarrays of type np.float64 + and shape (nsamples,) that will be added as additional fields of the + output structure + + Returns + ------- + struct : A structured ndarray of parameter values. """ indict = type(chain) == dict @@ -377,6 +273,8 @@ def chain_to_struct(chain, model=None, names=None): else: dt = [(str(p), " Date: Sat, 18 Jun 2022 19:39:17 -0400 Subject: [PATCH 11/33] explicitly set output wavelength to sps when no input wavelength; assume FilterSet; require sedpy >= 0.3 --- prospect/models/sedmodel.py | 28 ++++++++++++++++------------ requirements.txt | 2 +- tests/test_predict.py | 16 +++++++++++----- 3 files changed, 28 insertions(+), 18 deletions(-) diff --git a/prospect/models/sedmodel.py b/prospect/models/sedmodel.py index bb16073e..eaf2cd81 100644 --- a/prospect/models/sedmodel.py +++ b/prospect/models/sedmodel.py @@ -126,7 +126,7 @@ def predict_obs(self, obs): if obs.kind == "spectrum": prediction = self.predict_spec(obs) elif obs.kind == "photometry": - prediction = self.predict_phot(obs["filters"]) + prediction = self.predict_phot(obs.filterset) return prediction def predict_spec(self, obs, **extras): @@ -169,17 +169,20 @@ def predict_spec(self, obs, **extras): including multiplication by the calibration vector. ndarray of shape ``(nwave,)`` in units of maggies. """ - self._outwave = obs['wavelength'] - # redshift model wavelength obs_wave = self.observed_wave(self._wave, do_wavecal=False) + # get output wavelength vector + self._outwave = obs.wavelength + if self._outwave is None: + self._outwave = obs_wave + # Set up for emission lines self.cache_eline_parameters(obs) # --- smooth and put on output wavelength grid --- # physical smoothing - smooth_spec = self.smoothspec(obs_wave, self._norm_spec) + smooth_spec = self.velocity_smoothing(obs_wave, self._norm_spec) # instrumental smoothing (accounting for library resolution) smooth_spec = obs.instrumental_smoothing(self._outwave, smooth_spec, libres=self._library_resolution) @@ -213,7 +216,7 @@ def predict_spec(self, obs, **extras): return calibrated_spec - def predict_phot(self, filters): + def predict_phot(self, filterset): """Generate a prediction for the observed photometry. This method assumes that the parameters have been set and that the following attributes are present and correct: @@ -239,11 +242,11 @@ def predict_phot(self, filters): # generate photometry w/o emission lines obs_wave = self.observed_wave(self._wave, do_wavecal=False) flambda = self._norm_spec * lightspeed / obs_wave**2 * (3631*jansky_cgs) - phot = np.atleast_1d(getSED(obs_wave, flambda, filters, linear_flux=True)) + phot = np.atleast_1d(getSED(obs_wave, flambda, filterset, linear_flux=True)) # generate emission-line photometry if (self._want_lines & self._need_lines): - phot += self.nebline_photometry(filters) + phot += self.nebline_photometry(filterset) return phot @@ -289,7 +292,7 @@ def _need_lines(self): def _want_lines(self): return bool(self.params.get('add_neb_emission', False)) - def nebline_photometry(self, filters, elams=None, elums=None): + def nebline_photometry(self, filterset, elams=None, elums=None): """Compute the emission line contribution to photometry. This requires several cached attributes: + ``_ewave_obs`` @@ -320,11 +323,11 @@ def nebline_photometry(self, filters, elams=None, elums=None): elums = self._eline_lum[self._use_eline] * self.line_norm # loop over filters - flux = np.zeros(len(filters)) + flux = np.zeros(len(filterset)) try: # TODO: Since in this case filters are on a grid, there should be a # faster way to look up the transmission than the later loop - flist = filters.filters + flist = filterset.filters except(AttributeError): flist = filters for i, filt in enumerate(flist): @@ -578,12 +581,13 @@ def get_eline_gaussians(self, lineidx=slice(None), wave=None): return eline_gaussians - def smoothspec(self, wave, spec): + def velocity_smoothing(self, wave, spec): """Smooth the spectrum. See :py:func:`prospect.utils.smoothing.smoothspec` for details. """ sigma = self.params.get("sigma_smooth", 100) - outspec = smoothspec(wave, spec, sigma, outwave=self._outwave, **self.params) + outspec = smoothspec(wave, spec, sigma, outwave=self._outwave, + smoothtype="vel", fft=True) return outspec diff --git a/requirements.txt b/requirements.txt index 8947ebd2..0226c015 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,5 +2,5 @@ numpy >= 1.14.2 scipy >= 1.1.0 astropy h5py -astro-sedpy +astro-sedpy >= 0.3.0 six \ No newline at end of file diff --git a/tests/test_predict.py b/tests/test_predict.py index dacbc93b..2d36db8e 100644 --- a/tests/test_predict.py +++ b/tests/test_predict.py @@ -12,7 +12,7 @@ from prospect.data import Spectrum, Photometry -@pytest.fixture(scope="module") +#@pytest.fixture(scope="module") def build_sps(): sps = CSPSpecBasis(zcontinuous=1) return sps @@ -47,12 +47,18 @@ def build_obs(multispec=True): return obslist -@pytest.mark.skip(reason="not ready") -def xtest_prediction_nodata(build_sps): +#@pytest.mark.skip(reason="not ready") +def test_prediction_nodata(build_sps): + sps = build_sps + model = build_model(add_neb=True) sobs, pobs = build_obs(multispec=False) pobs.flux = None pobs.uncertainty = None - pred, mfrac = model.predict(model.theta, observations=[pobs], sps=sps) + sobs.wavelength = None + sobs.flux = None + sobs.uncertainty = None + pred, mfrac = model.predict(model.theta, observations=[sobs, pobs], sps=sps) + assert len(pred[0]) == len(sps.wavelengths) def test_multispec(build_sps): @@ -94,4 +100,4 @@ def lnlike_testing(build_sps): #%timeit model.prior_product(model.theta) #%timeit predictions, x = model.predict(model.theta + np.random.uniform(0, 3) * arr, observations=obslist, sps=sps) #%timeit lnp_data = [compute_lnlike(pred, obs, vectors={}) for pred, obs in zip(predictions, observations)] - #%timeit lnp = lnprobfn(model.theta + np.random.uniform(0, 3) * arr, model=model, observations=obslist, sps=sps) \ No newline at end of file + #%timeit lnp = lnprobfn(model.theta + np.random.uniform(0, 3) * arr, model=model, observations=obslist, sps=sps) From 4e0017fcd9542b8dc51184382f3747e1020e63b7 Mon Sep 17 00:00:00 2001 From: Benjamin Johnson Date: Sat, 18 Jun 2022 19:57:33 -0400 Subject: [PATCH 12/33] Fix bugs and tests; allow use of list of Filters or FilterSet. --- prospect/data/observation.py | 17 +++++++++++++---- prospect/models/sedmodel.py | 20 +++++--------------- tests/test_eline.py | 34 +++++++++++++++++++++------------- tests/test_predict.py | 4 ++-- 4 files changed, 41 insertions(+), 34 deletions(-) diff --git a/prospect/data/observation.py b/prospect/data/observation.py index ac301e52..f82ce886 100644 --- a/prospect/data/observation.py +++ b/prospect/data/observation.py @@ -197,6 +197,16 @@ def __init__(self, filters=[], name="PhotA", **kwargs): name : string, optional The name for this set of data """ + self.set_filters(filters) + super(Photometry, self).__init__(name=name, **kwargs) + + def set_filters(self, filters): + if not filters: + self.filters = filters + self.filternames = [] + self.filterset = None + return + if type(filters[0]) is str: self.filternames = filters else: @@ -206,8 +216,6 @@ def __init__(self, filters=[], name="PhotA", **kwargs): # filters on the gridded resolution self.filters = [f for f in self.filterset.filters] - super(Photometry, self).__init__(name=name, **kwargs) - @property def wavelength(self): return np.array([f.wave_effective for f in self.filters]) @@ -354,7 +362,8 @@ def __init__(self, def from_oldstyle(obs, **kwargs): """Convert from an oldstyle dictionary to a list of observations """ - obslist = [Spectrum(**obs), Photometry(**obs)] + spec, phot = Spectrum(**obs), Photometry(**obs) + #phot.set_filters(phot.filters) #[o.rectify() for o in obslist] - return obslist + return [spec, phot] diff --git a/prospect/models/sedmodel.py b/prospect/models/sedmodel.py index eaf2cd81..4124449f 100644 --- a/prospect/models/sedmodel.py +++ b/prospect/models/sedmodel.py @@ -236,7 +236,7 @@ def predict_phot(self, filterset): ndarray of shape ``(len(filters),)``, in units of maggies. If ``filters`` is None, this returns 0.0 """ - if filters is None: + if filterset is None: return 0.0 # generate photometry w/o emission lines @@ -329,7 +329,7 @@ def nebline_photometry(self, filterset, elams=None, elums=None): # faster way to look up the transmission than the later loop flist = filterset.filters except(AttributeError): - flist = filters + flist = filterset for i, filt in enumerate(flist): # calculate transmission at line wavelengths trans = np.interp(elams, filt.wavelength, filt.transmission, @@ -616,7 +616,7 @@ def wave_to_x(self, wavelength=None, mask=slice(None), **extras): def spec_calibration(self, **kwargs): return np.ones_like(self._outwave) - def absolute_rest_maggies(self, filters): + def absolute_rest_maggies(self, filterset): """Return absolute rest-frame maggies (=10**(-0.4*M)) of the last computed spectrum. @@ -638,28 +638,18 @@ def absolute_rest_maggies(self, filters): fmaggies = self._norm_spec / (1 + self._zred) * (ld / 10)**2 # convert to erg/s/cm^2/AA for sedpy and get absolute magnitudes flambda = fmaggies * lightspeed / self._wave**2 * (3631*jansky_cgs) - abs_rest_maggies = 10**(-0.4 * np.atleast_1d(getSED(self._wave, flambda, filters))) - # TODO: below is faster for sedpy > 0.2.0 - #abs_rest_maggies = np.atleast_1d(getSED(self._wave, flambda, filters, linear_flux=True)) + abs_rest_maggies = np.atleast_1d(getSED(self._wave, flambda, filterset, linear_flux=True)) # add emission lines if bool(self.params.get('nebemlineinspec', False)) is False: eline_z = self.params.get("eline_delta_zred", 0.0) elams = (1 + eline_z) * self._eline_wave elums = self._eline_lum * self.flux_norm() / (1 + self._zred) * (3631*jansky_cgs) * (ld / 10)**2 - emaggies = self.nebline_photometry(filters, elams=elams, elums=elums) + emaggies = self.nebline_photometry(filterset, elams=elams, elums=elums) abs_rest_maggies += emaggies return abs_rest_maggies - def mean_model(self, theta, obs, sps=None, sigma=None, **extras): - """Legacy wrapper around predict() - """ - from ..utils.observation import from_oldstyle - obslist = from_oldstyle(obs) - predictions, mfrac = self.predict(theta, obslist, sps=sps, sigma_spec=sigma, **extras) - return predictions[0], predictions[1], mfrac - class PolySpecModel(SpecModel): diff --git a/tests/test_eline.py b/tests/test_eline.py index a25f1285..64c409c1 100644 --- a/tests/test_eline.py +++ b/tests/test_eline.py @@ -3,16 +3,22 @@ import numpy as np +import pytest + from sedpy import observate -from prospect import prospect_args from prospect.data import Photometry, Spectrum, from_oldstyle from prospect.models.templates import TemplateLibrary from prospect.models.sedmodel import SpecModel - from prospect.sources import CSPSpecBasis +@pytest.fixture +def get_sps(): + sps = CSPSpecBasis(zcontinuous=1) + return sps + + # test nebular line specification def test_eline_parsing(): model_pars = TemplateLibrary["parametric_sfh"] @@ -53,7 +59,7 @@ def test_eline_parsing(): assert model._fit_eline.sum() == (len(model._use_eline) - len(fix_lines)) -def test_nebline_phot_addition(): +def test_nebline_phot_addition(get_sps): fnames = [f"sdss_{b}0" for b in "ugriz"] filts = observate.load_filters(fnames) @@ -61,9 +67,10 @@ def test_nebline_phot_addition(): wavelength=np.linspace(3000, 9000, 1000), spectrum=np.ones(1000), unc=np.ones(1000)*0.1) - obslist = from_oldstyle(obs) + sdat, pdat = from_oldstyle(obs) + obslist = [sdat, pdat] - sps = CSPSpecBasis(zcontinuous=1) + sps = get_sps # Make emission lines more prominent zred = 1.0 @@ -88,11 +95,11 @@ def test_nebline_phot_addition(): p1n = m1.nebline_photometry(filts) assert np.any(p1n / p1[1] > 0.05) - # make sure you got the same answer - assert np.all(np.abs(p1 - p2) / p1 < 1e-3) + # make sure you got the same-ish answer + assert np.all((np.abs(p1 - p2) / p1) < 1e-2) -def test_filtersets(): +def test_filtersets(get_sps): """This test no longer relevant..... """ fnames = [f"sdss_{b}0" for b in "ugriz"] @@ -102,9 +109,10 @@ def test_filtersets(): spectrum=np.ones(1000), unc=np.ones(1000)*0.1, filters=fnames) - obslist = from_oldstyle(obs) + sdat, pdat = from_oldstyle(obs) + obslist = [sdat, pdat] - sps = CSPSpecBasis(zcontinuous=1) + sps = get_sps # Make emission lines more prominent zred = 0.5 @@ -128,7 +136,7 @@ def test_filtersets(): # make sure some of the filters are affected by lines # ( nebular flux > 10% of total flux) if i == 1: - nebphot = model.nebline_photometry(flist) + nebphot = model.nebline_photometry(pdat.filterset) assert np.any(nebphot / pset > 0.1) # make sure photometry is consistent @@ -136,7 +144,7 @@ def test_filtersets(): # We always use filtersets now -def test_eline_implementation(): +def test_eline_implementation(get_sps): test_eline_parsing() @@ -156,7 +164,7 @@ def test_eline_implementation(): model_pars["zred"]["init"] = 4 model = SpecModel(model_pars) - sps = CSPSpecBasis(zcontinuous=1) + sps = get_sps # generate with all fixed lines added (spec, phot), mfrac = model.predict(model.theta, obslist, sps=sps) diff --git a/tests/test_predict.py b/tests/test_predict.py index 2d36db8e..73ff5c16 100644 --- a/tests/test_predict.py +++ b/tests/test_predict.py @@ -12,7 +12,7 @@ from prospect.data import Spectrum, Photometry -#@pytest.fixture(scope="module") +@pytest.fixture(scope="module") def build_sps(): sps = CSPSpecBasis(zcontinuous=1) return sps @@ -47,7 +47,6 @@ def build_obs(multispec=True): return obslist -#@pytest.mark.skip(reason="not ready") def test_prediction_nodata(build_sps): sps = build_sps model = build_model(add_neb=True) @@ -59,6 +58,7 @@ def test_prediction_nodata(build_sps): sobs.uncertainty = None pred, mfrac = model.predict(model.theta, observations=[sobs, pobs], sps=sps) assert len(pred[0]) == len(sps.wavelengths) + assert len(pred[1]) == len(pobs.filterset) def test_multispec(build_sps): From 0a9c61dc3210b5c9cc29aa1d35db4d17248a1966 Mon Sep 17 00:00:00 2001 From: Benjamin Johnson Date: Wed, 27 Jul 2022 15:30:20 -0400 Subject: [PATCH 13/33] Simplification of prospect.sources Moves the SSPBasis and FastStepBasis to galaxy_basis module. Removes the ssp_basis, dust_basis, and boneyard modules. Removes the get_spectrum method from SSPBasis and subclasses; this is handled by SpecModel Remove various multicomponent hacks. --- prospect/sources/__init__.py | 12 +- prospect/sources/boneyard.py | 487 ------------------------------- prospect/sources/dust_basis.py | 104 ------- prospect/sources/galaxy_basis.py | 395 ++++++++++++++----------- prospect/sources/ssp_basis.py | 403 ------------------------- 5 files changed, 232 insertions(+), 1169 deletions(-) delete mode 100644 prospect/sources/boneyard.py delete mode 100644 prospect/sources/dust_basis.py delete mode 100644 prospect/sources/ssp_basis.py diff --git a/prospect/sources/__init__.py b/prospect/sources/__init__.py index 7f7bd353..e4bccd8c 100644 --- a/prospect/sources/__init__.py +++ b/prospect/sources/__init__.py @@ -1,12 +1,4 @@ from .galaxy_basis import * -from .ssp_basis import * -from .star_basis import * -from .dust_basis import * -from .boneyard import StepSFHBasis -__all__ = ["to_cgs", - "CSPSpecBasis", "MultiComponentCSPBasis", - "FastSSPBasis", "SSPBasis", - "FastStepBasis", "StepSFHBasis", - "StarBasis", "BigStarBasis", - "BlackBodyDustBasis"] +__all__ = ["CSPSpecBasis", "SSPBasis", + "FastStepBasis"] diff --git a/prospect/sources/boneyard.py b/prospect/sources/boneyard.py deleted file mode 100644 index e090771a..00000000 --- a/prospect/sources/boneyard.py +++ /dev/null @@ -1,487 +0,0 @@ -import numpy as np -from scipy.special import expi, gammainc - -from .ssp_basis import SSPBasis - - -__all__ = ["CSPBasis", "StepSFHBasis", "CompositeSFH", "LinearSFHBasis"] - -# change base -from .constants import loge - - -class CSPBasis(object): - """ - A class for composite stellar populations, which can be composed from - multiple versions of parameterized SFHs. Deprecated, Use CSPSpecBasis instead. - """ - def __init__(self, compute_vega_mags=False, zcontinuous=1, vactoair_flag=False, **kwargs): - - # This is a StellarPopulation object from fsps - self.csp = fsps.StellarPopulation(compute_vega_mags=compute_vega_mags, - zcontinuous=zcontinuous, - vactoair_flag=vactoair_flag) - self.params = {} - - def get_spectrum(self, outwave=None, filters=None, peraa=False, **params): - """Given a theta vector, generate spectroscopy, photometry and any - extras (e.g. stellar mass). - - :param theta: - ndarray of parameter values. - - :param sps: - A python-fsps StellarPopulation object to be used for - generating the SED. - - :returns spec: - The restframe spectrum in units of maggies. - - :returns phot: - The apparent (redshifted) observed frame maggies in each of the - filters. - - :returns extras: - A list of the ratio of existing stellar mass to total mass formed - for each component, length ncomp. - """ - self.params.update(**params) - # Pass the model parameters through to the sps object - ncomp = len(self.params['mass']) - for ic in range(ncomp): - s, p, x = self.one_sed(component_index=ic, filterlist=filters) - try: - spec += s - maggies += p - extra += [x] - except(NameError): - spec, maggies, extra = s, p, [x] - # `spec` is now in Lsun/Hz, with the wavelength array being the - # observed frame wavelengths. Flux array (and maggies) have not been - # increased by (1+z) due to cosmological redshift - - w = self.ssp.wavelengths - if outwave is not None: - spec = np.interp(outwave, w, spec) - else: - outwave = w - # Distance dimming and unit conversion - zred = self.params.get('zred', 0.0) - if (zred == 0) or ('lumdist' in self.params): - # Use 10pc for the luminosity distance (or a number provided in the - # lumdist key in units of Mpc). Do not apply cosmological (1+z) - # factor to the flux. - dfactor = (self.params.get('lumdist', 1e-5) * 1e5)**2 - a = 1.0 - else: - # Use the comsological luminosity distance implied by this - # redshift. Cosmological (1+z) factor on the flux was already done in one_sed - lumdist = cosmo.luminosity_distance(zred).value - dfactor = (lumdist * 1e5)**2 - if peraa: - # spectrum will be in erg/s/cm^2/AA - spec *= to_cgs / dfactor * lightspeed / outwave**2 - else: - # Spectrum will be in maggies - spec *= to_cgs / dfactor / (3631*jansky_cgs) - - # Convert from absolute maggies to apparent maggies - maggies /= dfactor - - return spec, maggies, extra - - def one_sed(self, component_index=0, filterlist=[]): - """Get the SED of one component for a multicomponent composite SFH. - Should set this up to work as an iterator. - - :param component_index: - Integer index of the component to calculate the SED for. - - :param filterlist: - A list of strings giving the (FSPS) names of the filters onto which - the spectrum will be projected. - - :returns spec: - The restframe spectrum in units of Lsun/Hz. - - :returns maggies: - Broadband fluxes through the filters named in ``filterlist``, - ndarray. Units are observed frame absolute maggies: M = -2.5 * - log_{10}(maggies). - - :returns extra: - The extra information corresponding to this component. - """ - # Pass the model parameters through to the sps object, and keep track - # of the mass of this component - mass = 1.0 - for k, vs in list(self.params.items()): - try: - v = vs[component_index] - except(IndexError, TypeError): - v = vs - if k in self.csp.params.all_params: - self.csp.params[k] = deepcopy(v) - if k == 'mass': - mass = v - # Now get the spectrum. The spectrum is in units of - # Lsun/Hz/per solar mass *formed*, and is restframe - w, spec = self.csp.get_spectrum(tage=self.csp.params['tage'], peraa=False) - # redshift and get photometry. Note we are boosting fnu by (1+z) *here* - a, b = (1 + self.csp.params['zred']), 0.0 - wa, sa = w * (a + b), spec * a # Observed Frame - if filterlist is not None: - mags = getSED(wa, lightspeed/wa**2 * sa * to_cgs, filterlist) - phot = np.atleast_1d(10**(-0.4 * mags)) - else: - phot = 0.0 - - # now some mass normalization magic - mfrac = self.csp.stellar_mass - if np.all(self.params.get('mass_units', 'mstar') == 'mstar'): - # Convert input normalization units from per stellar masss to per mass formed - mass /= mfrac - # Output correct units - return mass * sa, mass * phot, mfrac - - -class StepSFHBasis(SSPBasis): - """Subclass of SSPBasis that computes SSP weights for piecewise constant - SFHs (i.e. a binned SFH). The parameters for this SFH are: - - * `agebins` - array of shape (nbin, 2) giving the younger and older (in - lookback time) edges of each bin. If `interp_type` is `"linear"', - these are assumed to be in years. Otherwise they are in log10(years) - - * `mass` - array of shape (nbin,) giving the total surviving stellar mass - (in solar masses) in each bin, unless the `mass_units` parameter is set - to something different `"mstar"`, in which case the units are assumed - to be total stellar mass *formed* in each bin. - - The `agebins` parameter *must not be changed* without also setting - `self._ages=None`. - """ - - @property - def all_ssp_weights(self): - # Cache age bins and relative weights. This means params['agebins'] - # *must not change* without also setting _ages = None - if getattr(self, '_ages', None) is None: - self._ages = self.params['agebins'] - nbin, nssp = len(self._ages), len(self.logage) + 1 - self._bin_weights = np.zeros([nbin, nssp]) - for i, (t1, t2) in enumerate(self._ages): - # These *should* sum to one (or zero) for each bin - self._bin_weights[i, :] = self.bin_weights(t1, t2) - - # Now normalize the weights in each bin by the mass parameter, and sum - # over bins. - bin_masses = self.params['mass'] - if np.all(self.params.get('mass_units', 'mformed') == 'mstar'): - # Convert from mstar to mformed for each bin. We have to do this - # here as well as in get_spectrum because the *relative* - # normalization in each bin depends on the units, as well as the - # overall normalization. - bin_masses /= self.bin_mass_fraction - w = (bin_masses[:, None] * self._bin_weights).sum(axis=0) - - return w - - @property - def bin_mass_fraction(self): - """Return the ratio m_star(surviving) / m_formed for each bin. - """ - try: - mstar = self.ssp_stellar_masses - w = self._bin_weights - bin_mfrac = (mstar[None, :] * w).sum(axis=-1) / w.sum(axis=-1) - return bin_mfrac - except(AttributeError): - print('agebin info or ssp masses not chached?') - return 1.0 - - def bin_weights(self, amin, amax): - """Compute normalizations required to get a piecewise constant SFH - within an age bin. This is super complicated and obscured. The output - weights are such that one solar mass will have formed during the bin - (i.e. SFR = 1/(amax-amin)) - - This computes weights using \int_tmin^tmax dt (\log t_i - \log t) / - (\log t_{i+1} - \log t_i) but see sfh.tex for the detailed calculation - and the linear time interpolation case. - """ - if self.interp_type == 'linear': - sspages = np.insert(10**self.logage, 0, 0) - func = constant_linear - mass = amax - amin - elif self.interp_type == 'logarithmic': - sspages = np.insert(self.logage, 0, self.mint_log) - func = constant_logarithmic - mass = 10**amax - 10**amin - - assert amin >= sspages[0] - assert amax <= sspages.max() - - # below could be done by using two separate dt vectors instead of two - # age vectors - ages = np.array([sspages[:-1], sspages[1:]]) - dt = np.diff(ages, axis=0) - tmin, tmax = np.clip(ages, amin, amax) - - # get contributions from SSP sub-bin to the left and from SSP sub-bin - # to the right - left, right = (func(ages, tmax) - func(ages, tmin)) / dt - # put into full array - ww = np.zeros(len(sspages)) - ww[:-1] += right # last element has no sub-bin to the right - ww[1:] += -left # need to flip sign - - # normalize to 1 solar mass formed and return - return ww / mass - - -class CompositeSFH(SSPBasis): - """Subclass of SSPBasis that computes SSP weights for a parameterized SF. - The parameters for this SFH are: - - * `sfh_type` - String of "delaytau", "tau", "simha" - - * `tage`, `sf_trunc`, `sf_slope`, `const`, `fburst`, `tau` - - * `mass` - - - """ - - def configure(self): - """This reproduces FSPS-like combinations of SFHs. Note that the - *same* parameter set is passed to each component in the combination - """ - sfhs = [self.sfh_type] - limits = len(sfhs) * ['regular'] - if 'simha' in self.sfh_type: - sfhs = ['delaytau', 'linear'] - limits = ['regular', 'simha'] - - fnames = ['{0}_{1}'.format(f, self.interp_type) for f in sfhs] - lnames = ['{}_limits'.format(f) for f in limits] - self.funcs = [globals()[f] for f in fnames] - self.limits = [globals()[f] for f in lnames] - - if self.interp_type == 'linear': - sspages = np.insert(10**self.logage, 0, 0) - elif self.interp_type == 'logarithmic': - sspages = np.insert(self.logage, 0, self.mint_log) - self.ages = np.array([sspages[:-1], sspages[1:]]) - self.dt = np.diff(self.ages, axis=0) - - @property - def _limits(self): - pass - - @property - def _funcs(self): - pass - - @property - def all_ssp_weights(self): - - # Full output weight array. We keep separate vectors for each - # component so we can renormalize after the loop, but for many - # components it would be better to renormalize and sum within the loop - ww = np.zeros([len(self.funcs), self.ages.shape[-1] + 1]) - - # Loop over components. Note we are sending the same params to every component - for i, (limit, func) in enumerate(zip(self.limits, self.funcs)): - ww[i, :] = self.ssp_weights(func, limit, self.params) - - # renormalize each component to 1 Msun - assert np.all(ww >= 0) - wsum = ww.sum(axis=1) - # unless truly no SF in the component - if 0 in wsum: - wsum[wsum == 0] = 1.0 - ww /= wsum[:, None] - # apply relative normalizations - ww *= self.normalizations(**self.params)[:, None] - # And finally add all components together and renormalize again to - # 1Msun and return - return ww.sum(axis=0) / ww.sum() - - def ssp_weights(self, integral, limit_function, params, **extras): - # build full output weight vector - ww = np.zeros(self.ages.shape[-1] + 1) - tmin, tmax = limit_function(self.ages, mint_log=self.mint_log, - interp_type=self.interp_type, **params) - left, right = (integral(self.ages, tmax, **params) - - integral(self.ages, tmin, **params)) / self.dt - # Put into full array, shifting the `right` terms by 1 element - ww[:-1] += right # last SSP has no sub-bin to the right - ww[1:] += -left # need to flip sign - - # Note that now ww[i,1] = right[1] - left[0], where - # left[0] is the integral from tmin,0 to tmax,0 of - # SFR(t) * (sspages[0] - t)/(sspages[1] - sspages[0]) and - # right[1] is the integral from tmin,1 to tmax,1 of - # SFR(t) * (sspages[2] - t)/(sspages[2] - sspages[1]) - return ww - - def normalizations(self, tage=0., sf_trunc=0, sf_slope=0, const=0, - fburst=0, tau=0., **extras): - if (sf_trunc <= 0) or (sf_trunc > tage): - Tmax = tage - else: - Tmax = sf_trunc - # Tau models. SFH=1 -> power=1; SFH=4,5 -> power=2 - if ('delay' in self.sfh_type) or ('simha' in self.sfh_type): - power = 2. - else: - power = 1. - mass_tau = tau * gammainc(power, Tmax/tau) - - if 'simha' not in self.sfh_type: - return np.array([mass_tau]) - # SFR at Tmax - sfr_q = (Tmax/tau)**(power-1) * np.exp(-Tmax/tau) - - # linear. integral of (1 - m * (T - Tmax)) from Tmax to Tzero - if sf_slope == 0.: - Tz = tage - else: - Tz = Tmax + 1/np.float64(sf_slope) - if (Tz < Tmax) or (Tz > tage) or (not np.isfinite(Tz)): - Tz = tage - m = sf_slope - mass_linear = (Tz - Tmax) - m/2.*(Tz**2 + Tmax**2) + m*Tz*Tmax - - # normalize the linear portion relative to the tau portion - norms = np.array([1, mass_linear * sfr_q / mass_tau]) - norms /= norms.sum() - # now add in constant and burst - if (const > 0) or (fburst > 0): - norms = (1-fburst-const) * norms - norms.tolist().extend([const, fburst]) - return np.array(norms) - - -class LinearSFHBasis(SSPBasis): - """Subclass of SSPBasis that computes SSP weights for piecewise linear SFHs - (i.e. a linearly interpolated tabular SFH). The parameters for this SFH - are: - * `ages` - array of shape (ntab,) giving the lookback time of each - tabulated SFR. If `interp_type` is `"linear"', these are assumed to be - in years. Otherwise they are in log10(years) - * `sfr` - array of shape (ntab,) giving the SFR (in Msun/yr) - * `logzsol` - * `dust2` - """ - def get_galaxy_spectrum(self): - raise(NotImplementedError) - - -def regular_limits(ages, tage=0., sf_trunc=0., mint_log=-3, - interp_type='logarithmic', **extras): - # get the truncation time in units of lookback time - if (sf_trunc <= 0) or (sf_trunc > tage): - tq = 0 - else: - tq = tage - sf_trunc - if interp_type == 'logarithmic': - tq = np.log10(np.max([tq, 10**mint_log])) - tage = np.log10(np.max([tage, 10**mint_log])) - return np.clip(ages, tq, tage) - - -def simha_limits(ages, tage=0., sf_trunc=0, sf_slope=0., mint_log=-3, - interp_type='logarithmic', **extras): - # get the truncation time in units of lookback time - if (sf_trunc <= 0) or (sf_trunc > tage): - tq = 0 - else: - tq = tage - sf_trunc - t0 = tq - 1. / np.float64(sf_slope) - if (t0 > tq) or (t0 <= 0) or (not np.isfinite(t0)): - t0 = 0. - if interp_type == 'logarithmic': - tq = np.log10(np.max([tq, 10**mint_log])) - t0 = np.log10(np.max([t0, 10**mint_log])) - return np.clip(ages, t0, tq) - - -def constant_linear(ages, t, **extras): - """Indefinite integral for SFR = 1 - - :param ages: - Linear age(s) of the SSPs. - - :param t: - Linear time at which to evaluate the indefinite integral - """ - return ages * t - t**2 / 2 - - -def constant_logarithmic(logages, logt, **extras): - """SFR = 1 - """ - t = 10**logt - return t * (logages - logt + loge) - - -def tau_linear(ages, t, tau=None, **extras): - """SFR = e^{(tage-t)/\tau} - """ - return (ages - t + tau) * np.exp(t / tau) - - -def tau_logarithmic(logages, logt, tau=None, **extras): - """SFR = e^{(tage-t)/\tau} - """ - tprime = 10**logt / tau - return (logages - logt) * np.exp(tprime) + loge * expi(tprime) - - -def delaytau_linear(ages, t, tau=None, tage=None, **extras): - """SFR = (tage-t) * e^{(tage-t)/\tau} - """ - bracket = tage * ages - (tage + ages)*(t - tau) + t**2 - 2*t*tau + 2*tau**2 - return bracket * np.exp(t / tau) - - -def delaytau_logarithmic(logages, logt, tau=None, tage=None, **extras): - """SFR = (tage-t) * e^{(tage-t)/\tau} - """ - t = 10**logt - tprime = t / tau - a = (t - tage - tau) * (logt - logages) - tau * loge - b = (tage + tau) * loge - return a * np.exp(tprime) + b * expi(tprime) - - -def linear_linear(ages, t, tage=None, sf_trunc=0, sf_slope=0., **extras): - """SFR = [1 - sf_slope * (tage-t)] - """ - tq = np.max([0, tage-sf_trunc]) - k = 1 - sf_slope * tq - return k * ages * t + (sf_slope*ages - k) * t**2 / 2 - sf_slope * t**3 / 3 - - -def linear_logarithmic(logages, logt, tage=None, sf_trunc=0, sf_slope=0., **extras): - """SFR = [1 - sf_slope * (tage-t)] - """ - tq = np.max([0, tage-sf_trunc]) - t = 10**logt - k = 1 - sf_slope * tq - term1 = k * t * (logages - logt + loge) - term2 = sf_slope * t**2 / 2 * (logages - logt + loge / 2) - return term1 + term2 - - -def burst_linear(ages, t, tburst=None, **extras): - """Burst. SFR = \delta(t-t_burst) - """ - return ages - tburst - - -def burst_logarithmic(logages, logt, tburst=None, **extras): - """Burst. SFR = \delta(t-t_burst) - """ - return logages - np.log10(tburst) diff --git a/prospect/sources/dust_basis.py b/prospect/sources/dust_basis.py deleted file mode 100644 index 8e478bf7..00000000 --- a/prospect/sources/dust_basis.py +++ /dev/null @@ -1,104 +0,0 @@ -import numpy as np - -try: - from sedpy.observate import getSED -except(ImportError): - pass - -__all__ = ["BlackBodyDustBasis"] - -# cgs constants -from .constants import lsun, pc, kboltz, hplanck -lightspeed = 29979245800.0 - - -class BlackBodyDustBasis(object): - """ - """ - def __init__(self, **kwargs): - self.dust_parlist = ['mass', 'T', 'beta', 'kappa0', 'lambda0'] - self.params = {} - self.params.update(**kwargs) - self.default_wave = np.arange(1000) # in microns - - def get_spectrum(self, outwave=None, filters=None, **params): - """Given a params dictionary, generate spectroscopy, photometry and any - extras (e.g. stellar mass). - - :param outwave: - The output wavelength vector. - - :param filters: - A list of sedpy filter objects. - - :param params: - Keywords forming the parameter set. - - :returns spec: - The restframe spectrum in units of erg/s/cm^2/AA - - :returns phot: - The apparent (redshifted) maggies in each of the filters. - - :returns extras: - A list of None type objects, only included for consistency with the - SedModel class. - """ - self.params.update(**params) - if outwave is None: - outwave = self.default_wave - # Loop over number of MBBs - ncomp = len(self.params['mass']) - seds = [self.one_sed(icomp=ic, wave=outwave, filters=filters) - for ic in range(ncomp)] - # sum the components - spec = np.sum([s[0] for s in seds], axis=0) - maggies = np.sum([s[1] for s in seds], axis=0) - extra = [s[2] for s in seds] - - norm = self.normalization() - spec, maggies = norm * spec, norm * maggies - return spec, maggies, extra - - def one_sed(self, icomp=0, wave=None, filters=None, **extras): - """Pull out individual component parameters from the param dictionary - and generate spectra for those components - """ - cpars = {} - for k in self.dust_parlist: - try: - cpars[k] = np.squeeze(self.params[k][icomp]) - except(IndexError, TypeError): - cpars[k] = np.squeeze(self.params[k]) - - spec = cpars['mass'] * modified_BB(wave, **cpars) - phot = 10**(-0.4 * getSED(wave*1e4, spec, filters)) - return spec, phot, None - - def normalization(self): - """This method computes the normalization (due do distance dimming, - unit conversions, etc.) based on the content of the params dictionary. - """ - return 1 - - -def modified_BB(wave, T=20, beta=2.0, kappa0=1.92, lambda0=350, **extras): - """Return a modified blackbody. - - the normalization of the emissivity curve can be given as kappa0 and - lambda0 in units of cm^2/g and microns, default = (1.92, 350). Ouput units - are erg/s/micron/g. - """ - term = (lambda0 / wave)**beta - return planck(wave, T=T, **extras) * term * kappa0 - - -def planck(wave, T=20.0, **extras): - """Return planck function B_lambda (erg/s/micron) for a given T (in Kelvin) and - wave (in microns) - """ - # Return B_lambda in erg/s/micron - w = wave * 1e4 #convert from microns to cm - conv = 2 * hplank * lightspeed**2 / w**5 / 1e4 - denom = (np.exp(hplanck * lightspeed / (kboltz * T)) - 1) - return conv / denom diff --git a/prospect/sources/galaxy_basis.py b/prospect/sources/galaxy_basis.py index f767d150..da616086 100644 --- a/prospect/sources/galaxy_basis.py +++ b/prospect/sources/galaxy_basis.py @@ -2,21 +2,243 @@ import numpy as np from copy import deepcopy -from sedpy.smoothing import smoothspec -from .ssp_basis import SSPBasis -from .constants import cosmo, lightspeed, jansky_cgs, to_cgs_at_10pc - try: import fsps - from sedpy.observate import getSED, vac2air, air2vac except(ImportError, RuntimeError): pass -__all__ = ["CSPSpecBasis", "MultiComponentCSPBasis", - "to_cgs"] +__all__ = ["SSPBasis", "FastStepBasis", + "CSPSpecBasis"] + + +class SSPBasis(object): + + """This is a class that wraps the fsps.StellarPopulation object, which is + used for producing SSPs. The ``fsps.StellarPopulation`` object is accessed + as ``SSPBasis().ssp``. + + This class allows for the custom calculation of relative SSP weights (by + overriding ``all_ssp_weights``) to produce spectra from arbitrary composite + SFHs. Alternatively, the entire ``get_galaxy_spectrum`` method can be + overridden to produce a galaxy spectrum in some other way, for example + taking advantage of weight calculations within FSPS for tabular SFHs or for + parameteric SFHs. + + The base implementation here produces an SSP interpolated to the age given + by ``tage``, with initial mass given by ``mass``. However, this is much + slower than letting FSPS calculate the weights, as implemented in + :py:class:`FastSSPBasis`. + + Furthermore, smoothing, redshifting, and filter projections are handled + outside of FSPS, allowing for fast and more flexible algorithms. + + :param reserved_params: + These are parameters which have names like the FSPS parameters but will + not be passed to the StellarPopulation object because we are overriding + their functionality using (hopefully more efficient) custom algorithms. + """ + + def __init__(self, zcontinuous=1, reserved_params=['tage', 'sigma_smooth'], + interp_type='logarithmic', flux_interp='linear', + mint_log=-3, compute_vega_mags=False, + **kwargs): + """ + :param interp_type: (default: "logarithmic") + Specify whether to linearly interpolate the SSPs in log(t) or t. + For the latter, set this to "linear". + + :param flux_interp': (default: "linear") + Whether to compute the final spectrum as \sum_i w_i f_i or + e^{\sum_i w_i ln(f_i)}. Basically you should always do the former, + which is the default. + + :param mint_log: (default: -3) + The log of the age (in years) of the youngest SSP. Note that the + SSP at this age is assumed to have the same spectrum as the minimum + age SSP avalibale from fsps. Typically anything less than 4 or so + is fine for this parameter, since the integral converges as log(t) + -> -inf + + :param reserved_params: + These are parameters which have names like the FSPS parameters but + will not be passed to the StellarPopulation object because we are + overriding their functionality using (hopefully more efficient) + custom algorithms. + """ + + self.interp_type = interp_type + self.mint_log = mint_log + self.flux_interp = flux_interp + self.ssp = fsps.StellarPopulation(compute_vega_mags=compute_vega_mags, + zcontinuous=zcontinuous) + self.ssp.params['sfh'] = 0 + self.reserved_params = reserved_params + self.params = {} + self.update(**kwargs) + + def update(self, **params): + """Update the parameters, passing the *unreserved* FSPS parameters + through to the ``fsps.StellarPopulation`` object. + + :param params: + A parameter dictionary. + """ + for k, v in params.items(): + # try to make parameters scalar + try: + if (len(v) == 1) and callable(v[0]): + self.params[k] = v[0] + else: + self.params[k] = np.squeeze(v) + except: + self.params[k] = v + # Parameters named like FSPS params but that we reserve for use + # here. Do not pass them to FSPS. + if k in self.reserved_params: + continue + # Otherwise if a parameter exists in the FSPS parameter set, pass a + # copy of it in. + if k in self.ssp.params.all_params: + self.ssp.params[k] = deepcopy(v) + + # We use FSPS for SSPs !!ONLY!! + # except for FastStepBasis. And CSPSpecBasis. and... + # assert self.ssp.params['sfh'] == 0 + + def get_galaxy_spectrum(self, **params): + """Update parameters, then get the SSP spectrum + + Returns + ------- + wave : ndarray + Restframe avelength in angstroms. + + spectrum : ndarray + Spectrum in units of Lsun/Hz per solar mass formed. + + mass_fraction : float + Fraction of the formed stellar mass that still exists. + """ + self.update(**params) + wave, spec = self.ssp.get_spectrum(tage=float(self.params['tage']), peraa=False) + return wave, spec, self.ssp.stellar_mass + + def get_galaxy_elines(self): + """Get the wavelengths and specific emission line luminosity of the nebular emission lines + predicted by FSPS. These lines are in units of Lsun/solar mass formed. + This assumes that `get_galaxy_spectrum` has already been called. + + :returns ewave: + The *restframe* wavelengths of the emission lines, AA + + :returns elum: + Specific luminosities of the nebular emission lines, + Lsun/stellar mass formed + """ + ewave = self.ssp.emline_wavelengths + # This allows subclasses to set their own specific emission line + # luminosities within other methods, e.g., get_galaxy_spectrum, by + # populating the `_specific_line_luminosity` attribute. + elum = getattr(self, "_line_specific_luminosity", None) + + if elum is None: + elum = self.ssp.emline_luminosity.copy() + if elum.ndim > 1: + elum = elum[0] + if self.ssp.params["sfh"] == 3: + # tabular sfh + mass = np.sum(self.params.get('mass', 1.0)) + elum /= mass + + return ewave, elum + + @property + def logage(self): + return self.ssp.ssp_ages.copy() + + @property + def wavelengths(self): + return self.ssp.wavelengths.copy() + + +class FastStepBasis(SSPBasis): + """Subclass of :py:class:`SSPBasis` that implements a "nonparameteric" + (i.e. binned) SFH. This is accomplished by generating a tabular SFH with + the proper form to be passed to FSPS. The key parameters for this SFH are: + + * ``agebins`` - array of shape ``(nbin, 2)`` giving the younger and older + (in lookback time) edges of each bin in log10(years) + + * ``mass`` - array of shape ``(nbin,)`` giving the total stellar mass + (in solar masses) **formed** in each bin. + """ -to_cgs = to_cgs_at_10pc + def get_galaxy_spectrum(self, **params): + """Construct the tabular SFH and feed it to the ``ssp``. + """ + self.update(**params) + # --- check to make sure agebins have minimum spacing of 1million yrs --- + # (this can happen in flex models and will crash FSPS) + if np.min(np.diff(10**self.params['agebins'])) < 1e6: + raise ValueError + + mtot = self.params['mass'].sum() + time, sfr, tmax = self.convert_sfh(self.params['agebins'], self.params['mass']) + self.ssp.params["sfh"] = 3 # Hack to avoid rewriting the superclass + self.ssp.set_tabular_sfh(time, sfr) + wave, spec = self.ssp.get_spectrum(tage=tmax, peraa=False) + return wave, spec / mtot, self.ssp.stellar_mass / mtot + + def convert_sfh(self, agebins, mformed, epsilon=1e-4, maxage=None): + """Given arrays of agebins and formed masses with each bin, calculate a + tabular SFH. The resulting time vector has time points either side of + each bin edge with a "closeness" defined by a parameter epsilon. + + :param agebins: + An array of bin edges, log(yrs). This method assumes that the + upper edge of one bin is the same as the lower edge of another bin. + ndarray of shape ``(nbin, 2)`` + + :param mformed: + The stellar mass formed in each bin. ndarray of shape ``(nbin,)`` + + :param epsilon: (optional, default 1e-4) + A small number used to define the fraction time separation of + adjacent points at the bin edges. + + :param maxage: (optional, default: ``None``) + A maximum age of stars in the population, in yrs. If ``None`` then the maximum + value of ``agebins`` is used. Note that an error will occur if maxage + < the maximum age in agebins. + + :returns time: + The output time array for use with sfh=3, in Gyr. ndarray of shape (2*N) + + :returns sfr: + The output sfr array for use with sfh=3, in M_sun/yr. ndarray of shape (2*N) + + :returns maxage: + The maximum valid age in the returned isochrone. + """ + #### create time vector + agebins_yrs = 10**agebins.T + dt = agebins_yrs[1, :] - agebins_yrs[0, :] + bin_edges = np.unique(agebins_yrs) + if maxage is None: + maxage = agebins_yrs.max() # can replace maxage with something else, e.g. tuniv + t = np.concatenate((bin_edges * (1.-epsilon), bin_edges * (1+epsilon))) + t.sort() + t = t[1:-1] # remove older than oldest bin, younger than youngest bin + fsps_time = maxage - t + + #### calculate SFR at each t + sfr = mformed / dt + sfrout = np.zeros_like(t) + sfrout[::2] = sfr + sfrout[1::2] = sfr # * (1+epsilon) + + return (fsps_time / 1e9)[::-1], sfrout[::-1], maxage / 1e9 class CSPSpecBasis(SSPBasis): @@ -121,160 +343,3 @@ def get_galaxy_spectrum(self, **params): return wave, spectrum, mfrac_sum - -class MultiComponentCSPBasis(CSPSpecBasis): - - """Similar to :py:class`CSPSpecBasis`, a class for combinations of N composite stellar - populations (including single-age populations). The number of composite - stellar populations is given by the length of the `mass` parameter. - - However, in MultiComponentCSPBasis the SED of the different components are - tracked, and in get_spectrum() photometry can be drawn from a given - component or from the sum. - """ - - def get_galaxy_spectrum(self, **params): - """Update parameters, then loop over each component getting a spectrum - for each. Return all the component spectra, plus the sum. - - :param params: - A parameter dictionary that gets passed to the ``self.update`` - method and will generally include physical parameters that control - the stellar population and output spectrum or SED, some of which - may be vectors for the different componenets - - :returns wave: - Wavelength in angstroms. - - :returns spectrum: - Spectrum in units of Lsun/Hz/solar masses formed. ndarray of - shape(ncomponent+1, nwave). The last element is the sum of the - previous elements. - - :returns mass_fraction: - Fraction of the formed stellar mass that still exists, ndarray of - shape (ncomponent+1,) - """ - self.update(**params) - spectra = [] - mass = np.atleast_1d(self.params['mass']).copy() - mfrac = np.zeros_like(mass) - # Loop over mass components - for i, m in enumerate(mass): - self.update_component(i) - wave, spec = self.ssp.get_spectrum(tage=self.ssp.params['tage'], - peraa=False) - spectra.append(spec) - mfrac[i] = (self.ssp.stellar_mass) - - # Convert normalization units from per stellar mass to per mass formed - if np.all(self.params.get('mass_units', 'mformed') == 'mstar'): - mass /= mfrac - spectrum = np.dot(mass, np.array(spectra)) / mass.sum() - mfrac_sum = np.dot(mass, mfrac) / mass.sum() - - return wave, np.squeeze(spectra + [spectrum]), np.squeeze(mfrac.tolist() + [mfrac_sum]) - - def get_spectrum(self, outwave=None, filters=None, component=-1, **params): - """Get a spectrum and SED for the given params, choosing from different - possible components. - - :param outwave: (default: None) - Desired *vacuum* wavelengths. Defaults to the values in - `sps.wavelength`. - - :param peraa: (default: False) - If `True`, return the spectrum in erg/s/cm^2/AA instead of AB - maggies. - - :param filters: (default: None) - A list of filter objects for which you'd like photometry to be - calculated. - - :param component: (optional, default: -1) - An optional array where each element gives the index of the - component from which to choose the magnitude. scalar or iterable - of same length as `filters` - - :param **params: - Optional keywords giving parameter values that will be used to - generate the predicted spectrum. - - :returns spec: - Observed frame component spectra in AB maggies, unless `peraa=True` in which - case the units are erg/s/cm^2/AA. (ncomp+1, nwave) - - :returns phot: - Observed frame photometry in AB maggies, ndarray of shape (ncomp+1, nfilters) - - :returns mass_frac: - The ratio of the surviving stellar mass to the total mass formed. - """ - - # Spectrum in Lsun/Hz per solar mass formed, restframe - wave, spectrum, mfrac = self.get_galaxy_spectrum(**params) - - # Redshifting + Wavelength solution - # We do it ourselves. - a = 1 + self.params.get('zred', 0) - af = a - b = 0.0 - - if 'wavecal_coeffs' in self.params: - x = wave - wave.min() - x = 2.0 * (x / x.max()) - 1.0 - c = np.insert(self.params['wavecal_coeffs'], 0, 0) - # assume coeeficients give shifts in km/s - b = chebval(x, c) / (lightspeed*1e-13) - - wa, sa = wave * (a + b), spectrum * af # Observed Frame - if outwave is None: - outwave = wa - - # Observed frame photometry, as absolute maggies - if filters is not None: - # Magic to only do filter projections for unique filters, and get a - # mapping back into this list of unique filters - # note that this may scramble order of unique_filters - fnames = [f.name for f in filters] - unique_names, uinds, filter_ind = np.unique(fnames, return_index=True, return_inverse=True) - unique_filters = np.array(filters)[uinds] - mags = getSED(wa, lightspeed/wa**2 * sa * to_cgs, unique_filters) - phot = np.atleast_1d(10**(-0.4 * mags)) - else: - phot = 0.0 - filter_ind = 0 - - # Distance dimming and unit conversion - zred = self.params.get('zred', 0.0) - if (zred == 0) or ('lumdist' in self.params): - # Use 10pc for the luminosity distance (or a number - # provided in the dist key in units of Mpc) - dfactor = (self.params.get('lumdist', 1e-5) * 1e5)**2 - else: - lumdist = cosmo.luminosity_distance(zred).value - dfactor = (lumdist * 1e5)**2 - - # Spectrum will be in maggies - sa *= to_cgs / dfactor / (3631*jansky_cgs) - - # Convert from absolute maggies to apparent maggies - phot /= dfactor - - # Mass normalization - mass = np.atleast_1d(self.params['mass']) - mass = np.squeeze(mass.tolist() + [mass.sum()]) - - sa = (sa * mass[:, None]) - phot = (phot * mass[:, None])[component, filter_ind] - - return sa, phot, mfrac - - -def gauss(x, mu, A, sigma): - """Lay down mutiple gaussians on the x-axis. - """ - mu, A, sigma = np.atleast_2d(mu), np.atleast_2d(A), np.atleast_2d(sigma) - val = (A / (sigma * np.sqrt(np.pi * 2)) * - np.exp(-(x[:, None] - mu)**2 / (2 * sigma**2))) - return val.sum(axis=-1) diff --git a/prospect/sources/ssp_basis.py b/prospect/sources/ssp_basis.py deleted file mode 100644 index c4b17411..00000000 --- a/prospect/sources/ssp_basis.py +++ /dev/null @@ -1,403 +0,0 @@ -from copy import deepcopy -import numpy as np -from numpy.polynomial.chebyshev import chebval - -from sedpy.smoothing import smoothspec -from .constants import cosmo, lightspeed, jansky_cgs, to_cgs_at_10pc - -try: - import fsps - from sedpy.observate import getSED -except(ImportError, RuntimeError): - pass - -__all__ = ["SSPBasis", "FastSSPBasis", "FastStepBasis", - "MultiSSPBasis"] - - -to_cgs = to_cgs_at_10pc - - -class SSPBasis(object): - - """This is a class that wraps the fsps.StellarPopulation object, which is - used for producing SSPs. The ``fsps.StellarPopulation`` object is accessed - as ``SSPBasis().ssp``. - - This class allows for the custom calculation of relative SSP weights (by - overriding ``all_ssp_weights``) to produce spectra from arbitrary composite - SFHs. Alternatively, the entire ``get_galaxy_spectrum`` method can be - overridden to produce a galaxy spectrum in some other way, for example - taking advantage of weight calculations within FSPS for tabular SFHs or for - parameteric SFHs. - - The base implementation here produces an SSP interpolated to the age given - by ``tage``, with initial mass given by ``mass``. However, this is much - slower than letting FSPS calculate the weights, as implemented in - :py:class:`FastSSPBasis`. - - Furthermore, smoothing, redshifting, and filter projections are handled - outside of FSPS, allowing for fast and more flexible algorithms. - - :param reserved_params: - These are parameters which have names like the FSPS parameters but will - not be passed to the StellarPopulation object because we are overriding - their functionality using (hopefully more efficient) custom algorithms. - """ - - def __init__(self, zcontinuous=1, reserved_params=['tage', 'sigma_smooth'], - interp_type='logarithmic', flux_interp='linear', - mint_log=-3, compute_vega_mags=False, - **kwargs): - """ - :param interp_type: (default: "logarithmic") - Specify whether to linearly interpolate the SSPs in log(t) or t. - For the latter, set this to "linear". - - :param flux_interp': (default: "linear") - Whether to compute the final spectrum as \sum_i w_i f_i or - e^{\sum_i w_i ln(f_i)}. Basically you should always do the former, - which is the default. - - :param mint_log: (default: -3) - The log of the age (in years) of the youngest SSP. Note that the - SSP at this age is assumed to have the same spectrum as the minimum - age SSP avalibale from fsps. Typically anything less than 4 or so - is fine for this parameter, since the integral converges as log(t) - -> -inf - - :param reserved_params: - These are parameters which have names like the FSPS parameters but - will not be passed to the StellarPopulation object because we are - overriding their functionality using (hopefully more efficient) - custom algorithms. - """ - - self.interp_type = interp_type - self.mint_log = mint_log - self.flux_interp = flux_interp - self.ssp = fsps.StellarPopulation(compute_vega_mags=compute_vega_mags, - zcontinuous=zcontinuous) - self.ssp.params['sfh'] = 0 - self.reserved_params = reserved_params - self.params = {} - self.update(**kwargs) - - def update(self, **params): - """Update the parameters, passing the *unreserved* FSPS parameters - through to the ``fsps.StellarPopulation`` object. - - :param params: - A parameter dictionary. - """ - for k, v in params.items(): - # try to make parameters scalar - try: - if (len(v) == 1) and callable(v[0]): - self.params[k] = v[0] - else: - self.params[k] = np.squeeze(v) - except: - self.params[k] = v - # Parameters named like FSPS params but that we reserve for use - # here. Do not pass them to FSPS. - if k in self.reserved_params: - continue - # Otherwise if a parameter exists in the FSPS parameter set, pass a - # copy of it in. - if k in self.ssp.params.all_params: - self.ssp.params[k] = deepcopy(v) - - # We use FSPS for SSPs !!ONLY!! - # except for FastStepBasis. And CSPSpecBasis. and... - # assert self.ssp.params['sfh'] == 0 - - def get_galaxy_spectrum(self, **params): - """Update parameters, then multiply SSP weights by SSP spectra and - stellar masses, and sum. - - :returns wave: - Wavelength in angstroms. - - :returns spectrum: - Spectrum in units of Lsun/Hz/solar masses formed. - - :returns mass_fraction: - Fraction of the formed stellar mass that still exists. - """ - self.update(**params) - - # Get the SSP spectra and masses (caching the latter), adding an extra - # mass and spectrum for t=0, using the first SSP spectrum. - wave, ssp_spectra = self.ssp.get_spectrum(tage=0, peraa=False) - ssp_spectra = np.vstack([ssp_spectra[0, :], ssp_spectra]) - self.ssp_stellar_masses = np.insert(self.ssp.stellar_mass, 0, 1.0) - if self.flux_interp == 'logarithmic': - ssp_spectra = np.log(ssp_spectra) - - # Get weighted sum of spectra, adding the t=0 spectrum using the first SSP. - weights = self.all_ssp_weights - spectrum = np.dot(weights, ssp_spectra) / weights.sum() - if self.flux_interp == 'logarithmic': - spectrum = np.exp(spectrum) - - # Get the weighted stellar_mass/mformed ratio - mass_frac = (self.ssp_stellar_masses * weights).sum() / weights.sum() - return wave, spectrum, mass_frac - - def get_galaxy_elines(self): - """Get the wavelengths and specific emission line luminosity of the nebular emission lines - predicted by FSPS. These lines are in units of Lsun/solar mass formed. - This assumes that `get_galaxy_spectrum` has already been called. - - :returns ewave: - The *restframe* wavelengths of the emission lines, AA - - :returns elum: - Specific luminosities of the nebular emission lines, - Lsun/stellar mass formed - """ - ewave = self.ssp.emline_wavelengths - # This allows subclasses to set their own specific emission line - # luminosities within other methods, e.g., get_galaxy_spectrum, by - # populating the `_specific_line_luminosity` attribute. - elum = getattr(self, "_line_specific_luminosity", None) - - if elum is None: - elum = self.ssp.emline_luminosity.copy() - if elum.ndim > 1: - elum = elum[0] - if self.ssp.params["sfh"] == 3: - # tabular sfh - mass = np.sum(self.params.get('mass', 1.0)) - elum /= mass - - return ewave, elum - - def get_spectrum(self, outwave=None, filters=None, peraa=False, **params): - """Get a spectrum and SED for the given params. - - :param outwave: (default: None) - Desired *vacuum* wavelengths. Defaults to the values in - ``sps.wavelength``. - - :param peraa: (default: False) - If `True`, return the spectrum in erg/s/cm^2/AA instead of AB - maggies. - - :param filters: (default: None) - A list of filter objects for which you'd like photometry to be calculated. - - :param params: - Optional keywords giving parameter values that will be used to - generate the predicted spectrum. - - :returns spec: - Observed frame spectrum in AB maggies, unless ``peraa=True`` in which - case the units are erg/s/cm^2/AA. - - :returns phot: - Observed frame photometry in AB maggies. - - :returns mass_frac: - The ratio of the surviving stellar mass to the total mass formed. - """ - # Spectrum in Lsun/Hz per solar mass formed, restframe - wave, spectrum, mfrac = self.get_galaxy_spectrum(**params) - - # Redshifting + Wavelength solution - # We do it ourselves. - a = 1 + self.params.get('zred', 0) - af = a - b = 0.0 - - if 'wavecal_coeffs' in self.params: - x = wave - wave.min() - x = 2.0 * (x / x.max()) - 1.0 - c = np.insert(self.params['wavecal_coeffs'], 0, 0) - # assume coeeficients give shifts in km/s - b = chebval(x, c) / (lightspeed*1e-13) - - wa, sa = wave * (a + b), spectrum * af # Observed Frame - if outwave is None: - outwave = wa - - # Observed frame photometry, as absolute maggies - if filters is not None: - flambda = lightspeed/wa**2 * sa * to_cgs - phot = 10**(-0.4 * np.atleast_1d(getSED(wa, flambda, filters))) - # TODO: below is faster for sedpy > 0.2.0 - #phot = np.atleast_1d(getSED(wa, lightspeed/wa**2 * sa * to_cgs, - # filters, linear_flux=True)) - else: - phot = 0.0 - - # Spectral smoothing. - do_smooth = (('sigma_smooth' in self.params) and - ('sigma_smooth' in self.reserved_params)) - if do_smooth: - # We do it ourselves. - smspec = self.smoothspec(wa, sa, self.params['sigma_smooth'], - outwave=outwave, **self.params) - elif outwave is not wa: - # Just interpolate - smspec = np.interp(outwave, wa, sa, left=0, right=0) - else: - # no interpolation necessary - smspec = sa - - # Distance dimming and unit conversion - zred = self.params.get('zred', 0.0) - if (zred == 0) or ('lumdist' in self.params): - # Use 10pc for the luminosity distance (or a number - # provided in the dist key in units of Mpc) - dfactor = (self.params.get('lumdist', 1e-5) * 1e5)**2 - else: - lumdist = cosmo.luminosity_distance(zred).value - dfactor = (lumdist * 1e5)**2 - if peraa: - # spectrum will be in erg/s/cm^2/AA - smspec *= to_cgs / dfactor * lightspeed / outwave**2 - else: - # Spectrum will be in maggies - smspec *= to_cgs / dfactor / (3631*jansky_cgs) - - # Convert from absolute maggies to apparent maggies - phot /= dfactor - - # Mass normalization - mass = np.sum(self.params.get('mass', 1.0)) - if np.all(self.params.get('mass_units', 'mformed') == 'mstar'): - # Convert input normalization units from current stellar mass to mass formed - mass /= mfrac - - return smspec * mass, phot * mass, mfrac - - @property - def all_ssp_weights(self): - """Weights for a single age population. This is a slow way to do this! - """ - if self.interp_type == 'linear': - sspages = np.insert(10**self.logage, 0, 0) - tb = self.params['tage'] * 1e9 - - elif self.interp_type == 'logarithmic': - sspages = np.insert(self.logage, 0, self.mint_log) - tb = np.log10(self.params['tage']) + 9 - - ind = np.searchsorted(sspages, tb) # index of the higher bracketing lookback time - dt = (sspages[ind] - sspages[ind - 1]) - ww = np.zeros(len(sspages)) - ww[ind - 1] = (sspages[ind] - tb) / dt - ww[ind] = (tb - sspages[ind-1]) / dt - return ww - - def smoothspec(self, wave, spec, sigma, outwave=None, **kwargs): - outspec = smoothspec(wave, spec, sigma, outwave=outwave, **kwargs) - return outspec - - @property - def logage(self): - return self.ssp.ssp_ages.copy() - - @property - def wavelengths(self): - return self.ssp.wavelengths.copy() - - -class FastSSPBasis(SSPBasis): - """A subclass of :py:class:`SSPBasis` that is a faster way to do SSP models by letting - FSPS do the weight calculations. - """ - - def get_galaxy_spectrum(self, **params): - self.update(**params) - wave, spec = self.ssp.get_spectrum(tage=float(self.params['tage']), peraa=False) - return wave, spec, self.ssp.stellar_mass - - -class FastStepBasis(SSPBasis): - """Subclass of :py:class:`SSPBasis` that implements a "nonparameteric" - (i.e. binned) SFH. This is accomplished by generating a tabular SFH with - the proper form to be passed to FSPS. The key parameters for this SFH are: - - * ``agebins`` - array of shape ``(nbin, 2)`` giving the younger and older - (in lookback time) edges of each bin in log10(years) - - * ``mass`` - array of shape ``(nbin,)`` giving the total stellar mass - (in solar masses) **formed** in each bin. - """ - - def get_galaxy_spectrum(self, **params): - """Construct the tabular SFH and feed it to the ``ssp``. - """ - self.update(**params) - # --- check to make sure agebins have minimum spacing of 1million yrs --- - # (this can happen in flex models and will crash FSPS) - if np.min(np.diff(10**self.params['agebins'])) < 1e6: - raise ValueError - - mtot = self.params['mass'].sum() - time, sfr, tmax = self.convert_sfh(self.params['agebins'], self.params['mass']) - self.ssp.params["sfh"] = 3 # Hack to avoid rewriting the superclass - self.ssp.set_tabular_sfh(time, sfr) - wave, spec = self.ssp.get_spectrum(tage=tmax, peraa=False) - return wave, spec / mtot, self.ssp.stellar_mass / mtot - - def convert_sfh(self, agebins, mformed, epsilon=1e-4, maxage=None): - """Given arrays of agebins and formed masses with each bin, calculate a - tabular SFH. The resulting time vector has time points either side of - each bin edge with a "closeness" defined by a parameter epsilon. - - :param agebins: - An array of bin edges, log(yrs). This method assumes that the - upper edge of one bin is the same as the lower edge of another bin. - ndarray of shape ``(nbin, 2)`` - - :param mformed: - The stellar mass formed in each bin. ndarray of shape ``(nbin,)`` - - :param epsilon: (optional, default 1e-4) - A small number used to define the fraction time separation of - adjacent points at the bin edges. - - :param maxage: (optional, default: ``None``) - A maximum age of stars in the population, in yrs. If ``None`` then the maximum - value of ``agebins`` is used. Note that an error will occur if maxage - < the maximum age in agebins. - - :returns time: - The output time array for use with sfh=3, in Gyr. ndarray of shape (2*N) - - :returns sfr: - The output sfr array for use with sfh=3, in M_sun/yr. ndarray of shape (2*N) - - :returns maxage: - The maximum valid age in the returned isochrone. - """ - #### create time vector - agebins_yrs = 10**agebins.T - dt = agebins_yrs[1, :] - agebins_yrs[0, :] - bin_edges = np.unique(agebins_yrs) - if maxage is None: - maxage = agebins_yrs.max() # can replace maxage with something else, e.g. tuniv - t = np.concatenate((bin_edges * (1.-epsilon), bin_edges * (1+epsilon))) - t.sort() - t = t[1:-1] # remove older than oldest bin, younger than youngest bin - fsps_time = maxage - t - - #### calculate SFR at each t - sfr = mformed / dt - sfrout = np.zeros_like(t) - sfrout[::2] = sfr - sfrout[1::2] = sfr # * (1+epsilon) - - return (fsps_time / 1e9)[::-1], sfrout[::-1], maxage / 1e9 - - -class MultiSSPBasis(SSPBasis): - """An array of basis spectra with different ages, metallicities, and possibly dust - attenuations. - """ - def get_galaxy_spectrum(self): - raise(NotImplementedError) From 81ac17b9ae21d2d6d31026135705633d8d5d81f5 Mon Sep 17 00:00:00 2001 From: Benjamin Johnson Date: Tue, 29 Nov 2022 13:00:28 +0000 Subject: [PATCH 14/33] replace LineSpec with a method of SpecModel; rename get_el() to the more descriptive fit_mle_elines() --- prospect/data/observation.py | 2 +- prospect/models/sedmodel.py | 168 +++++++++++------------------------ 2 files changed, 53 insertions(+), 117 deletions(-) diff --git a/prospect/data/observation.py b/prospect/data/observation.py index f82ce886..95ae7844 100644 --- a/prospect/data/observation.py +++ b/prospect/data/observation.py @@ -316,7 +316,7 @@ def to_oldstyle(self): class Lines(Spectrum): - kind = "spectrum" + kind = "lines" alias = dict(spectrum="flux", unc="uncertainty", wavelength="wavelength", diff --git a/prospect/models/sedmodel.py b/prospect/models/sedmodel.py index 4124449f..de69f253 100644 --- a/prospect/models/sedmodel.py +++ b/prospect/models/sedmodel.py @@ -125,6 +125,8 @@ def predict(self, theta, observations=None, sps=None, **extras): def predict_obs(self, obs): if obs.kind == "spectrum": prediction = self.predict_spec(obs) + elif obs.kind == "lines": + prediction = self.predict_lines(obs) elif obs.kind == "photometry": prediction = self.predict_phot(obs.filterset) return prediction @@ -152,7 +154,7 @@ def predict_spec(self, obs, **extras): spectroscopic calibration factor included. Numerous quantities related to the emission lines are also cached (see - ``cache_eline_parameters()`` and ``fit_el()`` for details.) + ``cache_eline_parameters()`` and ``fit_mle_elines()`` for details.) :param obs: An instance of `Spectrum`, containing the output wavelength array, @@ -208,7 +210,7 @@ def predict_spec(self, obs, **extras): # FIXME: do this only if the noise model is non-trivial, and make sure masking is consistent #vectors = obs.noise.populate_vectors(obs) #sigma_spec = obs.noise.construct_covariance(**vectors) - self._fit_eline_spec = self.get_el(obs, calibrated_spec, sigma_spec) + self._fit_eline_spec = self.fit_mle_elines(obs, calibrated_spec, sigma_spec) calibrated_spec[emask] += self._fit_eline_spec.sum(axis=1) # --- cache intrinsic spectrum --- @@ -216,6 +218,52 @@ def predict_spec(self, obs, **extras): return calibrated_spec + def predict_lines(self, obs, **extras): + """Generate a prediction for the observed nebular line fluxes. This method assumes + that the model parameters have been set and that the following + attributes are present and correct + + ``_wave`` - The SPS restframe wavelength array + + ``_zred`` - Redshift + + ``_norm_spec`` - Observed frame spectral fluxes, in units of maggies + + ``_eline_wave`` and ``_eline_lum`` - emission line parameters from the SPS model + It generates the following attributes + + ``_outwave`` - Wavelength grid (observed frame) + + ``_speccal`` - Calibration vector + + Numerous quantities related to the emission lines are also cached (see + ``cache_eline_parameters()`` and ``fit_mle_elines()`` for details) including + ``_predicted_line_inds`` which is the indices of the line that are predicted. + + :param obs: + An observation dictionary, containing the keys + + ``"wavelength"`` - the observed frame wavelength of the lines. + + ``"line_ind"`` - a set of indices identifying the observed lines in + the fsps line array + Assumed to be the result of :py:meth:`utils.obsutils.rectify_obs` + + :returns spec: + The prediction for the observed frame nebular emission line flux these + parameters, at the wavelengths specified by ``obs['wavelength']``, + ndarray of shape ``(nwave,)`` in units of erg/s/cm^2. + """ + obs_wave = self.observed_wave(self._eline_wave, do_wavecal=False) + self._outwave = obs.get('wavelength', obs_wave) + assert len(self._outwave) <= len(self.emline_info) + + # --- cache eline parameters --- + self.cache_eline_parameters(obs) + + # find the indices of the observed emission lines + #dw = np.abs(self._ewave_obs[:, None] - self._outwave[None, :]) + #self._predicted_line_inds = np.argmin(dw, axis=0) + self._predicted_line_inds = obs.get("line_ind") + self._speccal = 1.0 + + self.line_norm = self.flux_norm() / (1 + self._zred) * (3631*jansky_cgs) + elums = self._eline_lum[self._predicted_line_inds] * self.line_norm + + return elums + def predict_phot(self, filterset): """Generate a prediction for the observed photometry. This method assumes that the parameters have been set and that the following attributes are @@ -430,7 +478,7 @@ def parse_elines(self): self._use_eline = ~np.isin(self.emline_info["name"], self.params["elines_to_ignore"]) - def fit_el(self, obs, calibrated_spec, sigma_spec=None): + def fit_mle_elines(self, obs, calibrated_spec, sigma_spec=None): """Compute the maximum likelihood and, optionally, MAP emission line amplitudes for lines that fall within the observed spectral range. Also compute and cache the analytic penalty to log-likelihood from @@ -794,118 +842,6 @@ def obs_to_mask(self, obs): return mask, wrange -class LineSpecModel(SpecModel): - - """This is a sublcass of SpecModel that predicts emission line fluxes - instead of a full spectrum, useful when the continuum is not detected or is - otherwise uninformative. - """ - - def _available_parameters(self): - pars = [("linespec_scaling", "This float scales the predicted nebular " - "emission line luminosities, for example to accxount for a " - "(constant in wavelengtrh) slit loss"), - ] - - return pars - - def predict_spec(self, obs, **extras): - """Generate a prediction for the observed nebular line fluxes. This method assumes - that the model parameters have been set and that the following - attributes are present and correct - + ``_wave`` - The SPS restframe wavelength array - + ``_zred`` - Redshift - + ``_norm_spec`` - Observed frame spectral fluxes, in units of maggies - + ``_eline_wave`` and ``_eline_lum`` - emission line parameters from the SPS model - It generates the following attributes - + ``_outwave`` - Wavelength grid (observed frame) - + ``_speccal`` - Calibration vector - - Numerous quantities related to the emission lines are also cached (see - ``cache_eline_parameters()`` and ``fit_el()`` for details) including - ``_predicted_line_inds`` which is the indices of the line that are predicted. - - :param obs: - An observation dictionary, containing the keys - + ``"wavelength"`` - the observed frame wavelength of the lines. - + ``"line_ind"`` - a set of indices identifying the observed lines in - the fsps line array - Assumed to be the result of :py:meth:`utils.obsutils.rectify_obs` - - :returns spec: - The prediction for the observed frame nebular emission line flux these - parameters, at the wavelengths specified by ``obs['wavelength']``, - ndarray of shape ``(nwave,)`` in units of erg/s/cm^2. - """ - obs_wave = self.observed_wave(self._eline_wave, do_wavecal=False) - self._outwave = obs.get('wavelength', obs_wave) - assert len(self._outwave) <= len(self.emline_info) - - # --- cache eline parameters --- - self.cache_eline_parameters(obs) - - # find the indices of the observed emission lines - #dw = np.abs(self._ewave_obs[:, None] - self._outwave[None, :]) - #self._predicted_line_inds = np.argmin(dw, axis=0) - self._predicted_line_inds = obs.get("line_ind") - self._speccal = 1.0 - - self.line_norm = self.flux_norm() / (1 + self._zred) * (3631*jansky_cgs) - self.line_norm *= self.params.get("linespec_scaling", 1.0) - elums = self._eline_lum[self._predicted_line_inds] * self.line_norm - - return elums - - def nebline_photometry(self, filters, elams=None, elums=None): - """Compute the emission line contribution to photometry. This requires - several cached attributes: - + ``_ewave_obs`` - + ``_eline_lum`` - - :param filters: - Instance of :py:class:`sedpy.observate.FilterSet` or list of - :py:class:`sedpy.observate.Filter` objects - - :param elams: (optional) - The emission line wavelength in angstroms. If not supplied uses the - cached ``_ewave_obs`` attribute. - - :param elums: (optional) - The emission line flux in erg/s/cm^2. If not supplied uses the - cached ``_eline_lum`` attribute and applies appropriate distance - dimming and unit conversion. - - :returns nebflux: - The flux of the emission line through the filters, in units of - maggies. ndarray of shape ``(len(filters),)`` - """ - if (elams is None) or (elums is None): - elams = self._ewave_obs[self._use_eline] - # We have to remove the extra (1+z) since this is flux, not a flux density - # Also we convert to cgs - self.line_norm = self.flux_norm() / (1 + self._zred) * (3631*jansky_cgs) - elums = self._eline_lum[self._use_eline] * self.line_norm - - # loop over filters - flux = np.zeros(len(filters)) - try: - # TODO: Since in this case filters are on a grid, there should be a - # faster way to look up the transmission than the later loop - flist = filters.filters - except(AttributeError): - flist = filters - for i, filt in enumerate(flist): - # calculate transmission at line wavelengths - trans = np.interp(elams, filt.wavelength, filt.transmission, - left=0., right=0.) - # include all lines where transmission is non-zero - idx = (trans > 0) - if True in idx: - flux[i] = (trans[idx]*elams[idx]*elums[idx]).sum() / filt.ab_zero_counts - - return flux - - class AGNSpecModel(SpecModel): def __init__(self, *args, **kwargs): @@ -963,7 +899,7 @@ def predict_spec(self, obs, sigma_spec=None, **extras): spectroscopic calibration factor included. Numerous quantities related to the emission lines are also cached (see - ``cache_eline_parameters()`` and ``fit_el()`` for details.) + ``cache_eline_parameters()`` and ``fit_mle_elines()`` for details.) :param obs: An observation dictionary, containing the output wavelength array, From 8e9935df2a7357530a5e66852c3dedd702492a96 Mon Sep 17 00:00:00 2001 From: Benjamin Johnson Date: Tue, 29 Nov 2022 13:14:09 +0000 Subject: [PATCH 15/33] Docstring updates, make env name more flexible in install instructions. --- conda_install.sh | 1 - prospect/models/sedmodel.py | 23 ++++++++++++++--------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/conda_install.sh b/conda_install.sh index 22bd4323..3b9e5f07 100644 --- a/conda_install.sh +++ b/conda_install.sh @@ -9,7 +9,6 @@ cd $CODEDIR git clone https://github.com/cconroy20/fsps.git export SPS_HOME="$PWD/fsps" -# Create and activate environment (named 'prospector') git clone https://github.com/bd-j/prospector.git cd prospector conda env create -f environment.yml -n prospector diff --git a/prospect/models/sedmodel.py b/prospect/models/sedmodel.py index de69f253..243557db 100644 --- a/prospect/models/sedmodel.py +++ b/prospect/models/sedmodel.py @@ -10,7 +10,6 @@ from numpy.polynomial.chebyshev import chebval, chebvander from scipy.interpolate import splrep, BSpline -from scipy.stats import multivariate_normal as mvn from sedpy.observate import getSED from sedpy.smoothing import smoothspec @@ -20,8 +19,9 @@ from ..sources.constants import cosmo, lightspeed, ckms, jansky_cgs -__all__ = ["SpecModel", "PolySpecModel", "SplineSpecModel", - "LineSpecModel", "AGNSpecModel", +__all__ = ["SpecModel", + "PolySpecModel", "SplineSpecModel", + "AGNSpecModel", "PolyFitModel"] @@ -220,28 +220,32 @@ def predict_spec(self, obs, **extras): def predict_lines(self, obs, **extras): """Generate a prediction for the observed nebular line fluxes. This method assumes - that the model parameters have been set and that the following - attributes are present and correct + that the model parameters have been set, that any adjustments to the + emission line fluxes based on ML fitting have been applied, and that the + following attributes are present and correct + ``_wave`` - The SPS restframe wavelength array + ``_zred`` - Redshift - + ``_norm_spec`` - Observed frame spectral fluxes, in units of maggies + ``_eline_wave`` and ``_eline_lum`` - emission line parameters from the SPS model It generates the following attributes + ``_outwave`` - Wavelength grid (observed frame) + ``_speccal`` - Calibration vector + + ``line_norm`` - the conversion from FSPS line luminosities to the + observed line luminosities, including scaling fudge_factor + + ``_predicted_line_inds`` - the indices of the line that are predicted Numerous quantities related to the emission lines are also cached (see ``cache_eline_parameters()`` and ``fit_mle_elines()`` for details) including ``_predicted_line_inds`` which is the indices of the line that are predicted. + ``cache_eline_parameters()`` and ``fit_elines()`` for details). + :param obs: - An observation dictionary, containing the keys + A ``data.observation.Lines()`` instance, with the attributes + ``"wavelength"`` - the observed frame wavelength of the lines. + ``"line_ind"`` - a set of indices identifying the observed lines in the fsps line array - Assumed to be the result of :py:meth:`utils.obsutils.rectify_obs` - :returns spec: + :returns elum: The prediction for the observed frame nebular emission line flux these parameters, at the wavelengths specified by ``obs['wavelength']``, ndarray of shape ``(nwave,)`` in units of erg/s/cm^2. @@ -260,6 +264,7 @@ def predict_lines(self, obs, **extras): self._speccal = 1.0 self.line_norm = self.flux_norm() / (1 + self._zred) * (3631*jansky_cgs) + self.line_norm *= self.params.get("linespec_scaling", 1.0) elums = self._eline_lum[self._predicted_line_inds] * self.line_norm return elums From 3a1f234bde534e742d61205cf3e71e843f6f2849 Mon Sep 17 00:00:00 2001 From: Benjamin Johnson Date: Wed, 30 Nov 2022 01:41:42 +0000 Subject: [PATCH 16/33] Fix imports; update and test AGNSpecModel predictions. --- prospect/data/observation.py | 20 +++++++++++++------- prospect/models/__init__.py | 2 +- prospect/models/sedmodel.py | 28 ++++++++++++++++++++++++++-- tests/test_agn_eline.py | 29 +++++++++++++---------------- 4 files changed, 53 insertions(+), 26 deletions(-) diff --git a/prospect/data/observation.py b/prospect/data/observation.py index 95ae7844..963cbe40 100644 --- a/prospect/data/observation.py +++ b/prospect/data/observation.py @@ -221,9 +221,13 @@ def wavelength(self): return np.array([f.wave_effective for f in self.filters]) def to_oldstyle(self): - obs = vars(self) - obs.update({k: self[v] for k, v in self.alias.items()}) - _ = [obs.pop(k) for k in ["flux", "uncertainty", "mask"]] + obs = {} + obs.update(vars(self)) + for k, v in self.alias.items(): + obs[k] = self[v] + _ = obs.pop(v) + #obs.update({k: self[v] for k, v in self.alias.items()}) + #_ = [obs.pop(k) for k in ["flux", "uncertainty", "mask"]] obs["phot_wave"] = self.wavelength return obs @@ -285,7 +289,7 @@ def instrumental_smoothing(self, obswave, influx, libres=0): Flux array libres : float or ndarray - Library resolution in units of km/ (dispersion) to be subtracted from the smoothing kernel. + Library resolution in units of km/s (dispersion) to be subtracted from the smoothing kernel. Returns ------- @@ -308,9 +312,11 @@ def instrumental_smoothing(self, obswave, influx, libres=0): return out def to_oldstyle(self): - obs = vars(self) - obs.update({k: self[v] for k, v in self.alias.items()}) - _ = [obs.pop(k) for k in ["flux", "uncertainty"]] + obs = {} + obs.update(vars(self)) + for k, v in self.alias.items(): + obs[k] = self[v] + _ = obs.pop(v) return obs diff --git a/prospect/models/__init__.py b/prospect/models/__init__.py index c263ee34..1fe98241 100644 --- a/prospect/models/__init__.py +++ b/prospect/models/__init__.py @@ -8,7 +8,7 @@ from .sedmodel import ProspectorParams, SpecModel from .sedmodel import PolySpecModel, SplineSpecModel -from .sedmodel import AGNSpecModel, LineSpecModel +from .sedmodel import AGNSpecModel __all__ = ["ProspectorParams", diff --git a/prospect/models/sedmodel.py b/prospect/models/sedmodel.py index 243557db..3d5d8871 100644 --- a/prospect/models/sedmodel.py +++ b/prospect/models/sedmodel.py @@ -881,7 +881,7 @@ def init_aline_info(self): assert np.abs(self.emline_info["wave"][59] - 4863) < 2 self._aline_lum[ainds] = afluxes - def predict_spec(self, obs, sigma_spec=None, **extras): + def predict_spec(self, obs, **extras): """Generate a prediction for the observed spectrum. This method assumes that the parameters have been set and that the following attributes are present and correct @@ -933,7 +933,7 @@ def predict_spec(self, obs, sigma_spec=None, **extras): self.cache_eline_parameters(obs, nsigma=nsigma) # --- smooth and put on output wavelength grid --- - smooth_spec = self.smoothspec(obs_wave, self._norm_spec) + smooth_spec = self.velocity_smoothing(obs_wave, self._norm_spec) # --- add fixed lines --- assert self.params["nebemlineinspec"] == False, "must add agn and nebular lines within prospector" @@ -962,6 +962,30 @@ def predict_spec(self, obs, sigma_spec=None, **extras): return calibrated_spec + def predict_lines(self, obs, **extras): + """Generate a prediction for the observed nebular line fluxes, including + AGN. + + :param obs: + A ``data.observation.Lines()`` instance, with the attributes + + ``"wavelength"`` - the observed frame wavelength of the lines. + + ``"line_ind"`` - a set of indices identifying the observed lines in + the fsps line array + + :returns elum: + The prediction for the observed frame nebular + AGN emission line + flux these parameters, at the wavelengths specified by + ``obs['wavelength']``, ndarray of shape ``(nwave,)`` in units of + erg/s/cm^2. + """ + sflums = super().predict_lines(obs, **extras) + anorm = self.params.get('agn_elum', 1.0) * self.line_norm + alums = self._aline_lum[self._predicted_line_inds] * anorm + + elums = sflums + alums + + return elums + def predict_phot(self, filters): """Generate a prediction for the observed photometry. This method assumes that the parameters have been set and that the following attributes are diff --git a/tests/test_agn_eline.py b/tests/test_agn_eline.py index 3fca249d..84f8afe7 100644 --- a/tests/test_agn_eline.py +++ b/tests/test_agn_eline.py @@ -5,7 +5,7 @@ import numpy as np from sedpy import observate -from prospect.utils.obsutils import fix_obs +from prospect.data.observation import Spectrum, Photometry from prospect.models.sedmodel import AGNSpecModel from prospect.models.templates import TemplateLibrary from prospect.sources import CSPSpecBasis @@ -19,11 +19,13 @@ def test_agn_elines(): fnames = [f"sdss_{b}0" for b in "ugriz"] filts = observate.load_filters(fnames) - obs = dict(filters=filts, - wavelength=np.linspace(3000, 9000, 1000), - spectrum=np.ones(1000), - unc=np.ones(1000)*0.1) - obs = fix_obs(obs) + phot = Photometry(filters=filts, + flux=np.ones(len(filts)), + uncertainty=0.1 * np.ones(len(filts))) + spec = Spectrum(wavelength=np.linspace(3000, 9000, 1000), + flux=np.ones(1000), + uncertainty=np.ones(1000)*0.1) + obs = [spec, phot] # --- model --- model_pars = TemplateLibrary["parametric_sfh"] @@ -36,26 +38,21 @@ def test_agn_elines(): model = AGNSpecModel(model_pars) model.params["agn_elum"] = 1e-4 - spec0, phot0, x0 = model.predict(model.theta, obs, sps) + (spec0, phot0), x0 = model.predict(model.theta, obs, sps) model.params["agn_elum"] = 1e-6 - spec1, phot1, x1 = model.predict(model.theta, obs, sps) + (spec1, phot1), x1 = model.predict(model.theta, obs, sps) assert (not np.allclose(spec1, spec0)), "changing AGN luminosity had no effect" model.params["agn_elum"] = 1e-4 model.params["agn_eline_sigma"] = 400.0 - spec2, phot2, x2 = model.predict(model.theta, obs, sps) + (spec2, phot2), x2 = model.predict(model.theta, obs, sps) assert (not np.allclose(spec2, spec0)), "broadening lines had no effect on the spectrum" assert np.allclose(phot2, phot0), "broadening lines changed the photometry" # do a check for phot-only obs - pobs = dict(filters=filts, - maggies=np.ones(len(filts)), - maggies_unc=0.1 * np.ones(len(filts)), - wavelength=np.linspace(3000, 9000, 1000), - spectrum=None) - pobs = fix_obs(pobs) - spec3, phot3, x2 = model.predict(model.theta, obs=pobs, sps=sps) + pobs = [phot] + (phot3), x2 = model.predict(model.theta, observations=pobs, sps=sps) assert np.allclose(phot3, phot2), "Phot-only obs did not add AGn lines correctly" if False: From 6c25bb13f2e1723845077c54073ccebad42f658c Mon Sep 17 00:00:00 2001 From: Benjamin Johnson Date: Sun, 15 Jan 2023 21:40:53 -0500 Subject: [PATCH 17/33] fixes for run_dynesty; better observation data checking; some work with noise modeling. --- prospect/data/observation.py | 2 ++ prospect/fitting/fitting.py | 7 +++---- prospect/likelihood/noise_model.py | 25 +++++++++---------------- 3 files changed, 14 insertions(+), 20 deletions(-) diff --git a/prospect/data/observation.py b/prospect/data/observation.py index 963cbe40..1361bd66 100644 --- a/prospect/data/observation.py +++ b/prospect/data/observation.py @@ -86,6 +86,8 @@ def rectify(self): return assert self.wavelength.ndim == 1, "`wavelength` is not 1-d array" + assert self.flux.ndim == 1, "flux is not a 1d array" + assert self.uncertainty.ndim == 1, "uncertainty is not a 1d array" assert self.ndata > 0, "no wavelength points supplied!" assert self.uncertainty is not None, "No uncertainties." assert len(self.wavelength) == len(self.flux), "Flux array not same shape as wavelength." diff --git a/prospect/fitting/fitting.py b/prospect/fitting/fitting.py index 4afa53bd..7c6fdea9 100755 --- a/prospect/fitting/fitting.py +++ b/prospect/fitting/fitting.py @@ -100,7 +100,7 @@ def lnprobfn(theta, model=None, observations=None, sps=None, # --- Optionally return chi vectors for least-squares --- # note this does not include priors! if residuals: - chi = [compute_chi(spec, obs) for pred, obs in zip(predictions, observations)] + chi = [compute_chi(pred, obs) for pred, obs in zip(predictions, observations)] return np.concatenate(chi) # --- Emission Lines --- @@ -411,7 +411,7 @@ def run_emcee(observations, model, sps, lnprobfn=lnprobfn, return sampler, ts -def run_dynesty(obs, model, sps, noise, lnprobfn=lnprobfn, +def run_dynesty(observations, model, sps, lnprobfn=lnprobfn, pool=None, nested_target_n_effective=10000, **kwargs): """Thin wrapper on :py:class:`prospect.fitting.nested.run_dynesty_sampler` @@ -461,8 +461,7 @@ def run_dynesty(obs, model, sps, noise, lnprobfn=lnprobfn, from dynesty.dynamicsampler import stopping_function, weight_function nested_stop_kwargs = {"target_n_effective": nested_target_n_effective} - lnp = wrap_lnp(lnprobfn, observations, model, sps, noise=noise, - nested=True) + lnp = wrap_lnp(lnprobfn, observations, model, sps, nested=True) # Need to deal with postkwargs... diff --git a/prospect/likelihood/noise_model.py b/prospect/likelihood/noise_model.py index 5683cb21..9da98abd 100644 --- a/prospect/likelihood/noise_model.py +++ b/prospect/likelihood/noise_model.py @@ -12,8 +12,8 @@ class NoiseModel: - """This class allows for 1-d covariance matrix noise models without any - special kernels for covariance matrix construction. + """This base class allows for 1-d noise models without any special kernels + for covariance matrix construction, but with possibility for outliers. """ f_outlier = 0 @@ -31,8 +31,9 @@ def update(self, **params): def lnlike(self, pred, obs, vectors={}): - # Construct Sigma (and factorize if 2d) + # populatate vectors used as metrics and weight functions. vectors = self.populate_vectors(obs) + # Construct Sigma (and factorize if 2d) self.compute(**vectors) # Compute likelihood @@ -56,7 +57,9 @@ def lnlike(self, pred, obs, vectors={}): def populate_vectors(self, obs, vectors={}): # update vectors vectors["mask"] = obs.mask - vectors["unc"] = obs.uncertainty + vectors["wavelength"] = obs.wavelength + vectors["uncertainty"] = obs.uncertainty + vectors["flux"] = obs.flux if obs.kind == "photometry": vectors["filternames"] = obs.filternames vectors["phot_samples"] = obs.get("phot_samples", None) @@ -81,7 +84,8 @@ def lnlikelihood(self, pred, data): class NoiseModelCov(NoiseModel): - """This object allows for 1d or 2d covariance matrices constructed from kernels + """This object allows for 1d or 2d covariance matrices constructed from + kernels. """ def __init__(self, frac_out_name="f_outlier", nsigma_out_name="nsigma_outlier", @@ -95,17 +99,6 @@ def __init__(self, frac_out_name="f_outlier", nsigma_out_name="nsigma_outlier", self.metric_name = metric_name self.mask_name = mask_name - def populate_vectors(self, vectors, obs): - # update vectors - vectors["mask"] = obs.mask - vectors["wavelength"] = obs.wavelength - vectors["unc"] = obs.uncertainty - vectors["flux"] = obs.flux - if obs.kind == "photometry": - vectors["filternames"] = obs.filternames - vectors["phot_samples"] = obs.get("phot_samples", None) - return vectors - def construct_covariance(self, **vectors): """Construct a covariance matrix from a metric, a list of kernel objects, and a list of weight vectors (of same length as the metric) From 78f23325b348b8f35174dcebad54f5ecbb12be03 Mon Sep 17 00:00:00 2001 From: Benjamin Johnson Date: Mon, 16 Jan 2023 17:35:24 -0500 Subject: [PATCH 18/33] Fix imports and robustify metadata writing. --- prospect/io/write_results.py | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/prospect/io/write_results.py b/prospect/io/write_results.py index 765e41b3..ef6ce252 100644 --- a/prospect/io/write_results.py +++ b/prospect/io/write_results.py @@ -6,6 +6,7 @@ """ import os, time, warnings +from copy import deepcopy import pickle, json, base64 import numpy as np try: @@ -14,7 +15,6 @@ except(ImportError): _has_h5py_ = False - __all__ = ["githash", "write_hdf5", "chain_to_struct"] @@ -22,6 +22,23 @@ unserial = json.dumps('Unserializable') +class NumpyEncoder(json.JSONEncoder): + """ + """ + + def default(self, obj): + if isinstance(obj, np.ndarray): + return obj.tolist() + if isinstance(obj, type): + return str(obj) + if isinstance(obj, np.integer): + return int(obj) + if isinstance(obj, np.floating): + return float(obj) + + return json.JSONEncoder.default(self, obj) + + def pick(obj): """create a serialized object that can go into hdf5 in py2 and py3, and can be read by both """ @@ -32,7 +49,8 @@ def githash(**extras): """Pull out the git hash history for Prospector here. """ try: - from .._version import __version__, __githash__ + from .._version import __version__#, __githash__ + __githash__ = None bgh = __version__, __githash__ except(ImportError): warnings.warn("Could not obtain prospector version info", RuntimeWarning) @@ -139,7 +157,7 @@ def write_hdf5(hfile, run_params, model, obs, if obs["wavelength"] is None: best.create_dataset("restframe_wavelengths", data=sps.wavelengths) - # Store the githash last after flushing since getting it might cause an + # Store the githash last after flushing since getting it might cause an # uncatchable crash bgh = githash(**run_params) hf.attrs['prospector_version'] = json.dumps(bgh) @@ -154,7 +172,7 @@ def metadata(run_params, model, write_model_params=True): meta["model_params"] = deepcopy(model.params) for k, v in list(meta.items()): try: - meta[k] = json.dumps(v) + meta[k] = json.dumps(v, cls=NumpyEncoder) except(TypeError): meta[k] = pick(v) except: From 28770a74025e6f3462e3ccb75f15cbb44eaf2cc5 Mon Sep 17 00:00:00 2001 From: Benjamin Johnson Date: Tue, 17 Jan 2023 22:30:36 -0500 Subject: [PATCH 19/33] fix bug in covariance construction; add option to median smooth before polynomial fitting; comment out broken bestfit model saving. --- prospect/io/write_results.py | 21 +++++++++++---------- prospect/likelihood/noise_model.py | 4 ++-- prospect/models/sedmodel.py | 10 +++++++++- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/prospect/io/write_results.py b/prospect/io/write_results.py index ef6ce252..0194902c 100644 --- a/prospect/io/write_results.py +++ b/prospect/io/write_results.py @@ -146,16 +146,17 @@ def write_hdf5(hfile, run_params, model, obs, # Best fitting model in space of data if sps is not None: if "sampling/chain" in hf: - from ..plotting.utils import best_sample - pbest = best_sample(hf["sampling"]) - spec, phot, mfrac = model.predict(pbest, obs=obs, sps=sps) - best = hf.create_group("bestfit") - best.create_dataset("spectrum", data=spec) - best.create_dataset("photometry", data=phot) - best.create_dataset("parameter", data=pbest) - best.attrs["mfrac"] = mfrac - if obs["wavelength"] is None: - best.create_dataset("restframe_wavelengths", data=sps.wavelengths) + pass + #from ..plotting.utils import best_sample + #pbest = best_sample(hf["sampling"]) + #spec, phot, mfrac = model.predict(pbest, obs=obs, sps=sps) + #best = hf.create_group("bestfit") + #best.create_dataset("spectrum", data=spec) + #best.create_dataset("photometry", data=phot) + #best.create_dataset("parameter", data=pbest) + #best.attrs["mfrac"] = mfrac + #if obs["wavelength"] is None: + # best.create_dataset("restframe_wavelengths", data=sps.wavelengths) # Store the githash last after flushing since getting it might cause an # uncatchable crash diff --git a/prospect/likelihood/noise_model.py b/prospect/likelihood/noise_model.py index 9da98abd..9fc49e77 100644 --- a/prospect/likelihood/noise_model.py +++ b/prospect/likelihood/noise_model.py @@ -65,8 +65,8 @@ def populate_vectors(self, obs, vectors={}): vectors["phot_samples"] = obs.get("phot_samples", None) return vectors - def construct_covariance(self, unc=[], mask=slice(None), **vectors): - self.Sigma = np.atleast_1d(unc[mask]**2) + def construct_covariance(self, uncertainty=[], mask=slice(None), **other_vectors): + self.Sigma = np.atleast_1d(uncertainty[mask]**2) def compute(self, **vectors): """Make a boring diagonal Covariance array diff --git a/prospect/models/sedmodel.py b/prospect/models/sedmodel.py index 3d5d8871..786b735e 100644 --- a/prospect/models/sedmodel.py +++ b/prospect/models/sedmodel.py @@ -10,6 +10,7 @@ from numpy.polynomial.chebyshev import chebval, chebvander from scipy.interpolate import splrep, BSpline +from scipy.signal import medfilt from sedpy.observate import getSED from sedpy.smoothing import smoothspec @@ -714,7 +715,8 @@ class PolySpecModel(SpecModel): def _available_parameters(self): pars = [("polyorder", "order of the polynomial to fit"), - ("poly_regularization", "vector of length `polyorder` providing regularization for each polynomial term") + ("poly_regularization", "vector of length `polyorder` providing regularization for each polynomial term"), + ("median_polynomial", "if > 0, median smooth with a kernel of width order/range/median_polynomial before fitting") ] return pars @@ -756,6 +758,12 @@ def spec_calibration(self, theta=None, obs=None, spec=None, **kwargs): # masked wavelengths may have x>1, x<-1 x = self.wave_to_x(obs["wavelength"], mask) y = (obs['spectrum'] / spec)[mask] - 1.0 + + if self.params.get('median_polynomial', 0) > 0: + kernel_factor = self.params["median_polynomial"] + knl = int((x.max() - x.min()) / order / kernel_factor) + knl += int((knl % 2) == 0) + y = medfilt(y, knl) yerr = (obs['unc'] / spec)[mask] yvar = yerr**2 A = chebvander(x[mask], order) From b0f80a54c7afe2a36167eb6320a935403b8a89ad Mon Sep 17 00:00:00 2001 From: Benjamin Johnson Date: Thu, 19 Jan 2023 22:53:04 -0500 Subject: [PATCH 20/33] io and plotting; allow vector nebemlineinspec. --- prospect/data/observation.py | 80 +++++++----- prospect/io/read_results.py | 42 ++++--- prospect/io/write_results.py | 4 +- prospect/models/sedmodel.py | 3 +- prospect/plotting/utils.py | 41 +----- prospect/utils/{plotting.py => stats.py} | 153 ++++++++--------------- 6 files changed, 131 insertions(+), 192 deletions(-) rename prospect/utils/{plotting.py => stats.py} (61%) diff --git a/prospect/data/observation.py b/prospect/data/observation.py index 1361bd66..8d2c3bf5 100644 --- a/prospect/data/observation.py +++ b/prospect/data/observation.py @@ -9,8 +9,8 @@ from ..likelihood.noise_model import NoiseModel -__all__ = ["Observation", "Spectrum", "Photometry", - "from_oldstyle"] +__all__ = ["Observation", "Spectrum", "Photometry", "Lines" + "from_oldstyle", "from_serial", "obstypes"] class NumpyEncoder(json.JSONEncoder): @@ -37,8 +37,8 @@ class Observation: logify_spectrum = False alias = {} - meta = ["kind", "name"] - data = ["wavelength", "flux", "uncertainty", "mask"] + _meta = ["kind", "name"] + _data = ["wavelength", "flux", "uncertainty", "mask"] def __init__(self, flux=None, @@ -126,18 +126,20 @@ def ndata(self): else: return len(self.wavelength) - def to_json(self): - obs = {m: getattr(self, m) for m in self.meta + self.data} - serial = json.dumps(obs, cls=NumpyEncoder) - return serial + @property + def metadata(self): + meta = {m: getattr(self, m) for m in self._meta} + if "filternames" in meta: + meta["filters"] = ",".join(meta["filternames"]) + return meta def to_struct(self, data_dtype=np.float32): """Convert data to a structured array """ self._automask() - dtype = np.dtype([(c, data_dtype) for c in self.data]) + dtype = np.dtype([(c, data_dtype) for c in self._data]) struct = np.zeros(self.ndata, dtype=dtype) - for c in self.data: + for c in self._data: data = getattr(self, c) try: struct[c] = data @@ -146,31 +148,27 @@ def to_struct(self, data_dtype=np.float32): return struct def to_fits(self, filename=""): - """ - """ from astropy.io import fits hdus = fits.HDUList([fits.PrimaryHDU(), fits.BinTableHDU(self.to_struct())]) - meta = {m: getattr(self, m) for m in self.meta} - if "filternames" in meta: - meta["filters"] = ",".join(meta["filternames"]) - for k, v in meta.items(): - try: - for hdu in hdus: - hdu.header[k] = v - except(ValueError): - pass + for hdu in hdus: + hdu.header.update(self.metadata) if filename: hdus.writeto(filename, overwrite=True) hdus.close() def to_h5_dataset(self, handle): dset = handle.create_dataset(self.name, data=self.to_struct()) - for m in self.meta: - try: - dset.attr[m] = getattr(self, m) - except: - pass + dset.attrs.update(self.metadata) + + def to_json(self): + obs = {m: getattr(self, m) for m in self._meta + self._data} + serial = json.dumps(obs, cls=NumpyEncoder) + return serial + + @property + def to_nJy(self): + return 1e9 * 3631 class Photometry(Observation): @@ -180,7 +178,7 @@ class Photometry(Observation): maggies_unc="uncertainty", filters="filters", phot_mask="mask") - meta = ["kind", "name", "filternames"] + _meta = ["kind", "name", "filternames"] def __init__(self, filters=[], name="PhotA", **kwargs): """On Observation object that holds photometric data @@ -203,16 +201,20 @@ def __init__(self, filters=[], name="PhotA", **kwargs): super(Photometry, self).__init__(name=name, **kwargs) def set_filters(self, filters): - if not filters: + if len(filters) == 0: self.filters = filters self.filternames = [] self.filterset = None return - if type(filters[0]) is str: - self.filternames = filters - else: + try: self.filternames = [f.name for f in filters] + except(AttributeError): + self.filternames = filters + #if type(filters[0]) is str: + # self.filternames = filters + #else: + # self.filternames = [f.name for f in filters] self.filterset = FilterSet(self.filternames) # filters on the gridded resolution @@ -367,6 +369,11 @@ def __init__(self, self.line_ind = np.array(line_ind).as_type(int) +obstypes = dict(photometry=Photometry, + spectrum=Spectrum, + lines=Lines) + + def from_oldstyle(obs, **kwargs): """Convert from an oldstyle dictionary to a list of observations """ @@ -375,3 +382,14 @@ def from_oldstyle(obs, **kwargs): #[o.rectify() for o in obslist] return [spec, phot] + + +def from_serial(arr, meta): + adict = {a:arr[a] for a in arr.dtype.names} + adict["name"] = meta.get("name", "") + if 'filters' in meta: + adict["filters"] = meta["filters"].split(",") + obs = obstypes[meta["kind"]](**adict) + #[setattr(obs, m, v) for m, v in meta.items()] + return obs + diff --git a/prospect/io/read_results.py b/prospect/io/read_results.py index 71508841..40d9d789 100644 --- a/prospect/io/read_results.py +++ b/prospect/io/read_results.py @@ -69,19 +69,17 @@ def results_from(filename, model_file=None, dangerous=True, **kwargs): """ # Read the basic chain, parameter, and run_params info - res = read_hdf5(filename, **kwargs) + res, obs = read_hdf5(filename, **kwargs) # Now try to instantiate the model object from the paramfile - param_file = (res['run_params'].get('param_file', ''), - res.get("paramfile_text", '')) if dangerous: try: model = get_model(res) except: model = None - res['model'] = model + #res['model'] = model - return res, res["obs"], model + return res, obs, model def emcee_restarter(restart_from="", niter=32, **kwargs): @@ -151,8 +149,9 @@ def read_hdf5(filename, **extras): :param filename: Name of the HDF5 file. """ - groups = {"sampling": {}, "obs": {}, - "bestfit": {}, "optimization": {}} + groups = {"sampling": {}, + "bestfit": {}, + "optimization": {}} res = {} with h5py.File(filename, "r") as hf: # loop over the groups @@ -184,17 +183,23 @@ def read_hdf5(filename, **extras): res.update(groups['sampling']) res["bestfit"] = groups["bestfit"] res["optimization"] = groups["optimization"] - res['obs'] = groups['obs'] - try: - res['obs']['filters'] = load_filters([str(f) for f in res['obs']['filters']]) - except: - pass - try: - res['rstate'] = unpick(res['rstate']) - except: - pass + if 'observations' in hf: + obs = obs_from_h5(hf['observations']) + else: + obs = None + #res['obs'] = obs + + return res, obs + - return res +def obs_from_h5(obsgroup): + from ..data.observation import from_serial + observations = [] + for obsname, dset in obsgroup.items(): + arr, meta = dset[:], dict(dset.attrs) + obs = from_serial(arr, meta) + observations.append(obs) + return observations def get_sps(res): @@ -258,8 +263,7 @@ def get_model(res): A prospect.models.SedModel object """ import os - param_file = (res['run_params'].get('param_file', ''), - res.get("paramfile_text", '')) + param_file = ("prospar", res.get("paramfile_text", '')) path, filename = os.path.split(param_file[0]) modname = filename.replace('.py', '') user_module = import_module_from_string(param_file[1], modname) diff --git a/prospect/io/write_results.py b/prospect/io/write_results.py index 0194902c..748cd1f5 100644 --- a/prospect/io/write_results.py +++ b/prospect/io/write_results.py @@ -126,7 +126,7 @@ def write_hdf5(hfile, run_params, model, obs, # High level parameter and version info meta = metadata(run_params, model, write_model_params=write_model_params) for k, v in meta.items(): - hf.attrs[k] = k + hf.attrs[k] = v hf.flush() # ----------------- @@ -232,7 +232,7 @@ def write_obs_to_h5(hf, obslist): """Write observational data to the hdf5 file """ try: - odat = hf.create_group('obs') + odat = hf.create_group('observations') except(ValueError): # We already have an 'obs' group return diff --git a/prospect/models/sedmodel.py b/prospect/models/sedmodel.py index 786b735e..73eb65b3 100644 --- a/prospect/models/sedmodel.py +++ b/prospect/models/sedmodel.py @@ -340,7 +340,7 @@ def init_eline_info(self, eline_file='emlines_info.dat'): @property def _need_lines(self): - return (not (bool(self.params.get("nebemlineinspec", True)))) + return (not (bool(np.any(self.params.get("nebemlineinspec", True))))) @property def _want_lines(self): @@ -430,6 +430,7 @@ def cache_eline_parameters(self, obs, nsigma=5, forcelines=False): # exit gracefully if not adding lines. We also exit if only fitting # photometry, for performance reasons hasspec = obs.get('spectrum', None) is not None + #hasspec = True if not (self._want_lines & self._need_lines & hasspec): self._fit_eline_pixelmask = np.array([], dtype=bool) self._fix_eline_pixelmask = np.array([], dtype=bool) diff --git a/prospect/plotting/utils.py b/prospect/plotting/utils.py index f5c13ad2..df08c489 100644 --- a/prospect/plotting/utils.py +++ b/prospect/plotting/utils.py @@ -4,9 +4,10 @@ import numpy as np from .corner import _quantile from ..models.priors import TopHat as Uniform +from ..utils.stats import get_best, best_sample - -__all__ = ["get_best", "best_sample", "get_simple_prior", "sample_prior", "sample_posterior", +__all__ = ["get_best", "best_sample", + "get_simple_prior", "sample_prior", "sample_posterior", "boxplot", "violinplot", "step"] @@ -92,42 +93,6 @@ def sample_posterior(chain, weights=None, nsample=int(1e4), return flatchain[inds, :], extra[inds, ...] -def get_best(res, **kwargs): - """Get the maximum a posteriori parameters and their names - - :param res: - A ``results`` dictionary with the keys 'lnprobability', 'chain', and - 'theta_labels' - - :returns theta_names: - List of strings giving the names of the parameters, of length ``ndim`` - - :returns best: - ndarray with shape ``(ndim,)`` of parameter values corresponding to the - sample with the highest posterior probaility - """ - theta_best = best_sample(res) - - try: - theta_names = res["theta_labels"] - except(KeyError): - theta_names = res["model"].theta_labels() - return theta_names, theta_best - - -def best_sample(res): - """Get the posterior sample with the highest posterior probability. - """ - imax = np.argmax(res['lnprobability']) - # there must be a more elegant way to deal with differnt shapes - try: - i, j = np.unravel_index(imax, res['lnprobability'].shape) - theta_best = res['chain'][i, j, :].copy() - except(ValueError): - theta_best = res['chain'][imax, :].copy() - return theta_best - - def violinplot(data, pos, widths, ax=None, violin_kwargs={"showextrema": False}, color="slateblue", alpha=0.5, span=None, **extras): diff --git a/prospect/utils/plotting.py b/prospect/utils/stats.py similarity index 61% rename from prospect/utils/plotting.py rename to prospect/utils/stats.py index 9c1940e0..91d24dd2 100644 --- a/prospect/utils/plotting.py +++ b/prospect/utils/stats.py @@ -7,13 +7,49 @@ except(ImportError): pass -from ..plotting.utils import get_best __all__ = ["get_best", "get_truths", "get_percentiles", "get_stats", "posterior_samples", "hist_samples", "joint_pdf", "compute_sigma_level", "trim_walkers", "fill_between", "figgrid"] +def flatstruct(struct): + params = struct.dtype.names + m = [struct[s] for s in params] + return np.concatenate(m), params + + +def get_best(res, **kwargs): + """Get the maximum a posteriori parameters and their names + + :param res: + A ``results`` dictionary with the keys 'lnprobability', 'chain', and + 'theta_labels' + + :returns theta_names: + List of strings giving the names of the parameters, of length ``ndim`` + + :returns best: + ndarray with shape ``(ndim,)`` of parameter values corresponding to the + sample with the highest posterior probaility + """ + qbest, qnames = flatstruct(best_sample(res)) + return qnames, qbest + + +def best_sample(res): + """Get the posterior sample with the highest posterior probability. + """ + imax = np.argmax(res['lnprobability']) + # there must be a more elegant way to deal with differnt shapes + try: + i, j = np.unravel_index(imax, res['lnprobability'].shape) + Qbest = res['chain'][i, j].copy() + except(ValueError): + Qbest = res['chain'][imax].copy() + return Qbest + + def get_truths(res): import pickle try: @@ -48,16 +84,22 @@ def get_percentiles(res, ptile=[16, 50, 84], start=0.0, thin=1, **extras): requested percentiles for that parameter. """ - parnames = np.array(res.get('theta_labels', res['model'].theta_labels())) - niter = res['chain'].shape[-2] + chaincat = res["chain"] + niter = res['chain'].shape[0] + parnames = chaincat.dtype.names + weights = res.get("weights", None) + + start_index = np.floor(start * (niter-1)).astype(int) - if res["chain"].ndim > 2: - flatchain = res['chain'][:, start_index::thin, :] - dims = flatchain.shape - flatchain = flatchain.reshape(dims[0]*dims[1], dims[2]) - elif res["chain"].ndim == 2: - flatchain = res["chain"][start_index::thin, :] - pct = np.array([quantile(p, ptile, weights=res.get("weights", None)) for p in flatchain.T]) + if res["chain"].ndim > 1: + flatchain = res['chain'][:, start_index::thin] + flatchain = flatchain.reshape(-1) + elif res["chain"].ndim == 1: + flatchain = res["chain"][start_index::thin] + chain = flatstruct(chaincat) + + pct = [quantile(x, ptile, weights=weights, axis=0) + for x in chain.T] return dict(zip(parnames, pct)) @@ -111,38 +153,6 @@ def trim_walkers(res, threshold=-1e4): return trimmed -def joint_pdf(res, p1, p2, pmap={}, **kwargs): - """Build a 2-dimensional array representing the binned joint PDF of 2 - parameters, in terms of sigma or fraction of the total distribution. - - For example, to plot contours of the joint PDF of parameters ``"parname1"`` - and ``"parname2"`` from the last half of a chain with 30bins in each - dimension; - - :: - - xb, yb, sigma = joint_pdf(res, parname1, parname2, nbins=30, start=0.5) - ax.contour(xb, yb, sigma, **plotting_kwargs) - - :param p1: - The name of the parameter for the x-axis - - :param p2: - The name of the parameter for the y axis - - :returns xb, yb, sigma: - The bins and the 2-d histogram - """ - trace, pars = hist_samples(res, [p1, p2], **kwargs) - trace = trace.copy().T - if pars[0] == p1: - trace = trace[::-1, :] - x = pmap.get(p2, lambda x: x)(trace[0]) - y = pmap.get(p1, lambda x: x)(trace[1]) - xbins, ybins, sigma = compute_sigma_level(x, y, **kwargs) - return xbins, ybins, sigma.T - - def posterior_samples(res, nsample=None, **kwargs): """Pull samples of theta from the MCMC chain @@ -211,64 +221,5 @@ def hist_samples(res, showpars=None, start=0, thin=1, return flatchain, parnames[ind_show] -def compute_sigma_level(trace1, trace2, nbins=30, weights=None, extents=None, **extras): - """From a set of traces in two parameters, make a 2-d histogram of number - of standard deviations. Following examples from J Vanderplas. - """ - L, xbins, ybins = np.histogram2d(trace1, trace2, bins=nbins, - weights=weights, - range=extents) - L[L == 0] = 1E-16 - logL = np.log(L) - - shape = L.shape - L = L.ravel() - - # obtain the indices to sort and unsort the flattened array - i_sort = np.argsort(L)[::-1] - i_unsort = np.argsort(i_sort) - - L_cumsum = L[i_sort].cumsum() - L_cumsum /= L_cumsum[-1] - - xbins = 0.5 * (xbins[1:] + xbins[:-1]) - ybins = 0.5 * (ybins[1:] + ybins[:-1]) - - return xbins, ybins, L_cumsum[i_unsort].reshape(shape) - - -def figgrid(ny, nx, figsize=None, left=0.1, right=0.85, - top=0.9, bottom=0.1, wspace=0.2, hspace=0.10): - """Gridpars is - left, right - """ - from matplotlib import gridspec - if figsize is None: - figsize = (nx*4.5, ny*3) - fig = pl.figure(figsize=figsize) - axarray = np.zeros([ny, nx], dtype=np.dtype('O')) - gs1 = gridspec.GridSpec(ny, nx) - gs1.update(left=left, right=right, top=top, bottom=bottom, - wspace=wspace, hspace=hspace) - for i in range(ny): - for j in range(nx): - axarray[i, j] = fig.add_subplot(gs1[i, j]) - return fig, axarray - - -def fill_between(x, y1, y2=0, ax=None, **kwargs): - """Plot filled region between `y1` and `y2`. - - This function works exactly the same as matplotlib's fill_between, except - that it also plots a proxy artist (specifically, a rectangle of 0 size) - so that it can be added it appears on a legend. - """ - ax = ax if ax is not None else pl.gca() - ax.fill_between(x, y1, y2, **kwargs) - p = pl.Rectangle((0, 0), 0, 0, **kwargs) - ax.add_patch(p) - return p - - def logify(x): return np.log10(x) From 3f419d131e4747776ab11e6fb4d4566f493a2c65 Mon Sep 17 00:00:00 2001 From: Benjamin Johnson Date: Tue, 11 Apr 2023 10:50:11 -0400 Subject: [PATCH 21/33] Rename prospect.data -> prospect.observation. Also slight change to instrumental smoothing. --- prospect/data/__init__.py | 6 ------ prospect/models/sedmodel.py | 15 ++++++++------- prospect/observation/__init__.py | 6 ++++++ prospect/{data => observation}/observation.py | 15 ++++++++------- prospect/{data => observation}/obsutils.py | 0 pyproject.toml | 2 +- tests/test_agn_eline.py | 2 +- tests/test_eline.py | 2 +- 8 files changed, 25 insertions(+), 23 deletions(-) delete mode 100644 prospect/data/__init__.py create mode 100644 prospect/observation/__init__.py rename prospect/{data => observation}/observation.py (95%) rename prospect/{data => observation}/obsutils.py (100%) diff --git a/prospect/data/__init__.py b/prospect/data/__init__.py deleted file mode 100644 index f4bb7dc4..00000000 --- a/prospect/data/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -# -*- coding: utf-8 -*- - -from .observation import Photometry, Spectrum, from_oldstyle - -__all__ = ["Photometry", "Spectrum", - "from_oldstyle"] diff --git a/prospect/models/sedmodel.py b/prospect/models/sedmodel.py index 73eb65b3..4a077822 100644 --- a/prospect/models/sedmodel.py +++ b/prospect/models/sedmodel.py @@ -72,7 +72,7 @@ def predict(self, theta, observations=None, sps=None, **extras): theta : ndarray of shape ``(ndim,)`` Vector of free model parameter values. - observations : A list of `Observation` instances. + observations : A list of `Observation` instances (e.g. instance of ) The data to predict sps : @@ -104,7 +104,7 @@ def predict(self, theta, observations=None, sps=None, **extras): self._wave, self._spec, self._mfrac = sps.get_galaxy_spectrum(**self.params) self._zred = self.params.get('zred', 0) self._eline_wave, self._eline_lum = sps.get_galaxy_elines() - self._library_resolution = getattr(sps, "spectral_resolution", 0.0) + self._library_resolution = getattr(sps, "spectral_resolution", 0.0) # restframe # Flux normalize self._norm_spec = self._spec * self.flux_norm() @@ -184,10 +184,11 @@ def predict_spec(self, obs, **extras): self.cache_eline_parameters(obs) # --- smooth and put on output wavelength grid --- - # physical smoothing + # Physical smoothing of the whole spectrum smooth_spec = self.velocity_smoothing(obs_wave, self._norm_spec) - # instrumental smoothing (accounting for library resolution) - smooth_spec = obs.instrumental_smoothing(self._outwave, smooth_spec, + # Instrumental smoothing (accounting for library resolution) + # put onto the spec.wavelength grid + smooth_spec = obs.instrumental_smoothing(obs_wave, smooth_spec, libres=self._library_resolution) # --- add fixed lines if necessary --- @@ -640,8 +641,8 @@ def velocity_smoothing(self, wave, spec): """Smooth the spectrum. See :py:func:`prospect.utils.smoothing.smoothspec` for details. """ - sigma = self.params.get("sigma_smooth", 100) - outspec = smoothspec(wave, spec, sigma, outwave=self._outwave, + sigma = self.params.get("sigma_smooth", 300) + outspec = smoothspec(wave, spec, sigma, outwave=wave, smoothtype="vel", fft=True) return outspec diff --git a/prospect/observation/__init__.py b/prospect/observation/__init__.py new file mode 100644 index 00000000..cc72dfc9 --- /dev/null +++ b/prospect/observation/__init__.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- + +from .observation import Photometry, Spectrum, Lines, from_oldstyle + +__all__ = ["Photometry", "Spectrum", "Lines", + "from_oldstyle"] diff --git a/prospect/data/observation.py b/prospect/observation/observation.py similarity index 95% rename from prospect/data/observation.py rename to prospect/observation/observation.py index 8d2c3bf5..1366c396 100644 --- a/prospect/data/observation.py +++ b/prospect/observation/observation.py @@ -280,31 +280,32 @@ def __init__(self, self.calibration = calibration self.instrument_smoothing_parameters = dict(smoothtype="vel", fftsmooth=True) - def instrumental_smoothing(self, obswave, influx, libres=0): + def instrumental_smoothing(self, obswave, influx, zred=0, libres=0): """Smooth a spectrum by the instrumental resolution, optionally accounting (in quadrature) the intrinsic library resolution. Parameters ---------- - obswave : ndarray - Observed frame wavelengths, in units of AA + obswave : ndarray of shape (N_pix_model,) + Observed frame wavelengths, in units of AA for the model - influx : ndarray + influx : ndarray of shape (N_pix_model,) Flux array - libres : float or ndarray + libres : float or ndarray of shape (N_pix_model,) Library resolution in units of km/s (dispersion) to be subtracted from the smoothing kernel. + This should be in the observed frame and *on the same wavelength grid as obs.wavelength* Returns ------- - outflux : ndarray + outflux : ndarray of shape (ndata,) If instrument resolution is not None, this is the smoothed flux on the observed ``wavelength`` grid. If resolution is None, this just passes ``influx`` right back again. """ if self.resolution is None: # no-op - return influx + return np.interp(self.wavelength, obswave, influx) if libres: kernel = np.sqrt(self.resolution**2 - libres**2) diff --git a/prospect/data/obsutils.py b/prospect/observation/obsutils.py similarity index 100% rename from prospect/data/obsutils.py rename to prospect/observation/obsutils.py diff --git a/pyproject.toml b/pyproject.toml index 56035247..26500144 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ test = ["pytest", "pytest-xdist"] [tool.setuptools] packages = ["prospect", - "prospect.models", "prospect.sources", "prospect.data", + "prospect.models", "prospect.sources", "prospect.observation", "prospect.likelihood", "prospect.fitting", "prospect.io", "prospect.plotting", "prospect.utils"] diff --git a/tests/test_agn_eline.py b/tests/test_agn_eline.py index 84f8afe7..8c173839 100644 --- a/tests/test_agn_eline.py +++ b/tests/test_agn_eline.py @@ -5,7 +5,7 @@ import numpy as np from sedpy import observate -from prospect.data.observation import Spectrum, Photometry +from prospect.observation import Spectrum, Photometry from prospect.models.sedmodel import AGNSpecModel from prospect.models.templates import TemplateLibrary from prospect.sources import CSPSpecBasis diff --git a/tests/test_eline.py b/tests/test_eline.py index 64c409c1..293d7e70 100644 --- a/tests/test_eline.py +++ b/tests/test_eline.py @@ -7,7 +7,7 @@ from sedpy import observate -from prospect.data import Photometry, Spectrum, from_oldstyle +from prospect.observation import Photometry, Spectrum, from_oldstyle from prospect.models.templates import TemplateLibrary from prospect.models.sedmodel import SpecModel from prospect.sources import CSPSpecBasis From cf90515e94ac55a99229f5222ecbe120cce1c96d Mon Sep 17 00:00:00 2001 From: Benjamin Johnson Date: Tue, 11 Apr 2023 16:33:27 -0400 Subject: [PATCH 22/33] fix tests; put weight vector names in the kernels; add a NoiseModel1D class for uncorrelated kernels (like jitter). --- prospect/likelihood/kernels.py | 5 ++- prospect/likelihood/noise_model.py | 62 +++++++++++++++++++----------- prospect/models/sedmodel.py | 2 +- tests/test_predict.py | 2 +- 4 files changed, 45 insertions(+), 26 deletions(-) diff --git a/prospect/likelihood/kernels.py b/prospect/likelihood/kernels.py index 7e77e513..f4379c53 100644 --- a/prospect/likelihood/kernels.py +++ b/prospect/likelihood/kernels.py @@ -6,7 +6,7 @@ class Kernel(object): - def __init__(self, parnames=[], name=''): + def __init__(self, parnames=[], weight_by=None, name=''): """ :param parnames: A list of names of the kernel params, used to alias the intrinsic @@ -19,6 +19,7 @@ def __init__(self, parnames=[], name=''): self.param_alias = dict(zip(self.kernel_params, parnames)) self.params = {} self.name = name + self.weight_by = weight_by def __repr__(self): return '{}({})'.format(self.__class__, self.param_alias.items()) @@ -31,7 +32,7 @@ def update(self, **kwargs): for k in self.kernel_params: self.params[k] = kwargs[self.param_alias[k]] - def __call__(self, metric, weights=None, ndim=2, **extras): + def __call__(self, metric, weights=None, ndim=2): """Return a covariance matrix, given a metric. Optionally, multiply the output kernel by a weight function to induce non-stationarity. """ diff --git a/prospect/likelihood/noise_model.py b/prospect/likelihood/noise_model.py index 9fc49e77..f1585ce6 100644 --- a/prospect/likelihood/noise_model.py +++ b/prospect/likelihood/noise_model.py @@ -7,7 +7,7 @@ except(ImportError): pass -__all__ = ["NoiseModel", "NoiseModelCov", "NoiseModelKDE"] +__all__ = ["NoiseModel", "NoiseModel1D", "NoiseModelCov", "NoiseModelKDE"] class NoiseModel: @@ -19,7 +19,8 @@ class NoiseModel: f_outlier = 0 n_sigma_outlier = 50 - def __init__(self, frac_out_name="f_outlier", nsigma_out_name="nsigma_outlier"): + def __init__(self, frac_out_name="f_outlier", + nsigma_out_name="nsigma_outlier"): self.frac_out_name = frac_out_name self.nsigma_out_name = nsigma_out_name self.kernels = [] @@ -83,7 +84,40 @@ def lnlikelihood(self, pred, data): return lnp.sum() -class NoiseModelCov(NoiseModel): +class NoiseModel1D(NoiseModel): + """This class allows for 1D (diagonal) kernels + """ + + # TODO: metric names should be the responsibility of kernels, not noise models + def __init__(self, frac_out_name="f_outlier", + nsigma_out_name="nsigma_outlier", + metric_name='', + mask_name='mask', + kernels=[]): + self.frac_out_name = frac_out_name + self.nsigma_out_name = nsigma_out_name + self.kernels = kernels + self.metric_name = metric_name + self.mask_name = mask_name + + def construct_covariance(self, **vectors): + """Construct a covariance matrix from a metric, a list of kernel + objects, and a list of weight vectors (of same length as the metric) + """ + metric = vectors[self.metric_name] + mask = vectors.get(self.mask_name, slice(None)) + + # 1 = uncorrelated errors, 2 = covariance matrix, >2 undefined + ndmax = 1 + Sigma = np.zeros(metric[mask].shape[0]) + + for kernel in self.kernels: + wght = vectors.get(kernel.weight_by, None) + Sigma += kernel(metric[mask], weights=wght[mask], ndim=ndmax) + return Sigma + + +class NoiseModelCov(NoiseModel1D): """This object allows for 1d or 2d covariance matrices constructed from kernels. """ @@ -110,27 +144,11 @@ def construct_covariance(self, **vectors): ndmax = np.array([k.ndim for k in self.kernels]).max() Sigma = np.zeros(ndmax * [metric[mask].shape[0]]) - weight_vectors = self.get_weights(**vectors) - for i, (kernel, wght) in enumerate(zip(self.kernels, weight_vectors)): - Sigma += kernel(metric[mask], weights=wght, ndim=ndmax) + for kernel in self.kernels: + wght = vectors.get(kernel.weight_by, None) + Sigma += kernel(metric[mask], weights=wght[mask], ndim=ndmax) return Sigma - def get_weights(self, **vectors): - """From a dictionary of vectors that give weights, pull the vectors - that correspond to each kernel, as stored in the `weight_names` - attribute. A None vector will result in None weights - """ - mask = vectors.get(self.mask_name, slice(None)) - wghts = [] - for w in self.weight_names: - if w is None: - wghts += [None] - elif vectors[w] is None: - wghts += [None] - else: - wghts.append(vectors[w][mask]) - return wghts - def compute(self, check_finite=False, **vectors): """Build and cache the covariance matrix, and if it is 2-d factorize it and cache that. Also cache ``log_det``. diff --git a/prospect/models/sedmodel.py b/prospect/models/sedmodel.py index 4a077822..8890c0e3 100644 --- a/prospect/models/sedmodel.py +++ b/prospect/models/sedmodel.py @@ -187,7 +187,7 @@ def predict_spec(self, obs, **extras): # Physical smoothing of the whole spectrum smooth_spec = self.velocity_smoothing(obs_wave, self._norm_spec) # Instrumental smoothing (accounting for library resolution) - # put onto the spec.wavelength grid + # Put onto the spec.wavelength grid. smooth_spec = obs.instrumental_smoothing(obs_wave, smooth_spec, libres=self._library_resolution) diff --git a/tests/test_predict.py b/tests/test_predict.py index 73ff5c16..9fd74853 100644 --- a/tests/test_predict.py +++ b/tests/test_predict.py @@ -9,7 +9,7 @@ from sedpy.observate import load_filters from prospect.sources import CSPSpecBasis from prospect.models import SpecModel, templates -from prospect.data import Spectrum, Photometry +from prospect.observation import Spectrum, Photometry @pytest.fixture(scope="module") From 3f96e55d8b85c613bd5c32e5f1d2152928044064 Mon Sep 17 00:00:00 2001 From: Benjamin Johnson Date: Tue, 11 Apr 2023 22:17:32 -0400 Subject: [PATCH 23/33] Work on instrumental smoothing. --- prospect/models/sedmodel.py | 25 +++++---- prospect/observation/observation.py | 83 +++++++++++++++++++++++------ tests/test_agn_eline.py | 1 - tests/test_predict.py | 2 + 4 files changed, 85 insertions(+), 26 deletions(-) diff --git a/prospect/models/sedmodel.py b/prospect/models/sedmodel.py index 8890c0e3..022b4cd8 100644 --- a/prospect/models/sedmodel.py +++ b/prospect/models/sedmodel.py @@ -116,6 +116,9 @@ def predict(self, theta, observations=None, sps=None, **extras): self._ln_eline_penalty = 0 self._eline_lum_var = np.zeros_like(self._eline_wave) + # physical velocity smoothing of the whole UV/NIR spectrum + self._smooth_spec = self.velocity_smoothing(self._wave, self._norm_spec) + # generate predictions for likelihood # this assumes all spectral datasets (if present) occur first # because they can change the line strengths during marginalization. @@ -184,12 +187,10 @@ def predict_spec(self, obs, **extras): self.cache_eline_parameters(obs) # --- smooth and put on output wavelength grid --- - # Physical smoothing of the whole spectrum - smooth_spec = self.velocity_smoothing(obs_wave, self._norm_spec) # Instrumental smoothing (accounting for library resolution) # Put onto the spec.wavelength grid. - smooth_spec = obs.instrumental_smoothing(obs_wave, smooth_spec, - libres=self._library_resolution) + inst_spec = obs.instrumental_smoothing(obs_wave, self._smooth_spec, + libres=self._library_resolution) # --- add fixed lines if necessary --- emask = self._fix_eline_pixelmask @@ -198,11 +199,11 @@ def predict_spec(self, obs, **extras): espec = self.predict_eline_spec(line_indices=inds, wave=self._outwave[emask]) self._fix_eline_spec = espec - smooth_spec[emask] += self._fix_eline_spec.sum(axis=1) + inst_spec[emask] += self._fix_eline_spec.sum(axis=1) # --- calibration --- - self._speccal = self.spec_calibration(obs=obs, spec=smooth_spec, **extras) - calibrated_spec = smooth_spec * self._speccal + self._speccal = self.spec_calibration(obs=obs, spec=inst_spec, **extras) + calibrated_spec = inst_spec * self._speccal # --- fit and add lines if necessary --- emask = self._fit_eline_pixelmask @@ -642,8 +643,12 @@ def velocity_smoothing(self, wave, spec): for details. """ sigma = self.params.get("sigma_smooth", 300) - outspec = smoothspec(wave, spec, sigma, outwave=wave, - smoothtype="vel", fft=True) + sel = (wave > 1.2e3) & (wave < 2.5e4) + # TODO: make a fast version of this that is also accurate + sm = smoothspec(wave, spec, sigma, outwave=wave[sel], + smoothtype="vel", fftsmooth=True) + outspec = spec.copy() + outspec[sel] = sm return outspec @@ -944,6 +949,8 @@ def predict_spec(self, obs, **extras): # --- smooth and put on output wavelength grid --- smooth_spec = self.velocity_smoothing(obs_wave, self._norm_spec) + smooth_spec = obs.instrumental_smoothing(obs_wave, smooth_spec, + libres=self._library_resolution) # --- add fixed lines --- assert self.params["nebemlineinspec"] == False, "must add agn and nebular lines within prospector" diff --git a/prospect/observation/observation.py b/prospect/observation/observation.py index 1366c396..26a4bc98 100644 --- a/prospect/observation/observation.py +++ b/prospect/observation/observation.py @@ -4,7 +4,7 @@ import numpy as np from sedpy.observate import FilterSet -from sedpy.smoothing import smoothspec +from sedpy.smoothing import smoothspec, smooth_fft from ..likelihood.noise_model import NoiseModel @@ -13,6 +13,8 @@ "from_oldstyle", "from_serial", "obstypes"] +CKMS = 2.998e5 + class NumpyEncoder(json.JSONEncoder): def default(self, obj): @@ -126,6 +128,14 @@ def ndata(self): else: return len(self.wavelength) + @property + def wave_min(self): + return np.min(self.wavelength) + + @property + def wave_max(self): + return np.max(self.wavelength) + @property def metadata(self): meta = {m: getattr(self, m) for m in self._meta} @@ -201,7 +211,7 @@ def __init__(self, filters=[], name="PhotA", **kwargs): super(Photometry, self).__init__(name=name, **kwargs) def set_filters(self, filters): - if len(filters) == 0: + if (len(filters) == 0) or (filters is None): self.filters = filters self.filternames = [] self.filterset = None @@ -279,8 +289,40 @@ def __init__(self, self.resolution = resolution self.calibration = calibration self.instrument_smoothing_parameters = dict(smoothtype="vel", fftsmooth=True) - - def instrumental_smoothing(self, obswave, influx, zred=0, libres=0): + assert np.all(np.diff(self.wavelength) > 0) + self.pad_wavelength_array() + + def pad_wavelength_array(self, lambda_pad=100): + #wave_min = self.wave_min * (1 - np.arange(npad, 0, -1) * Kdelta[0] / ckms) + low_pad = np.arange(lambda_pad, 1, (self.wavelength[0]-self.wavelength[1])) + hi_pad = np.arange(1, lambda_pad, (self.wavelength[-1]-self.wavelength[-2])) + wave_min = self.wave_min - low_pad + wave_max = self.wave_max + hi_pad + self.padded_wavelength = np.concatenate([wave_min, self.wavelength, wave_max]) + self.padded_resolution = np.interp(self.padded_wavelength, self.wavelength, self.resolution) + self._unpadded_inds = slice(len(low_pad), -len(hi_pad)) + + def smooth_lsf_fft(self, inwave, influx, outwave, sigma): + dw = np.gradient(outwave) + sigma_per_pixel = (dw / sigma) + cdf = np.cumsum(sigma_per_pixel) + cdf /= cdf.max() + # check: do we need this? + x_per_pixel = np.gradient(cdf) + x_per_sigma = np.nanmedian(x_per_pixel / sigma_per_pixel) + pix_per_sigma = 1 + N = pix_per_sigma / x_per_sigma + nx = int(2**np.ceil(np.log2(N))) + # now evenly sample in the x coordinate + x = np.linspace(0, 1, nx) + dx = 1.0 / nx + lam = np.interp(x, cdf, outwave) + newflux = np.interp(lam, inwave, influx) + flux_conv = smooth_fft(dx, newflux, x_per_sigma) + outflux = np.interp(outwave, lam, flux_conv) + return outflux + + def instrumental_smoothing(self, wave_obs, influx, zred=0, libres=0): """Smooth a spectrum by the instrumental resolution, optionally accounting (in quadrature) the intrinsic library resolution. @@ -303,18 +345,27 @@ def instrumental_smoothing(self, obswave, influx, zred=0, libres=0): the observed ``wavelength`` grid. If resolution is None, this just passes ``influx`` right back again. """ - if self.resolution is None: - # no-op - return np.interp(self.wavelength, obswave, influx) - - if libres: - kernel = np.sqrt(self.resolution**2 - libres**2) - else: - kernel = self.resolution - out = smoothspec(obswave, influx, kernel, - outwave=self.wavelength, - **self.instrument_smoothing_parameters) - return out + # interpolate library resolution onto the instrumental wavelength grid + Klib = np.interp(self.padded_wavelength, wave_obs, libres) + # quadrature difference of instrumental and library reolution + Kdelta = np.sqrt(self.padded_resolution**2 - Klib**2) + Kdelta_lambda = Kdelta / CKMS * self.padded_wavelength + + outspec_padded = self.smooth_lsf_fft(wave_obs, + influx, + self.padded_wavelength, + Kdelta_lambda) + if False: + warr = [wave_min] + while warr[-1] < wave_max: + w = warr[-1] + dv = np.interp(w, self.wavelength, Kdelta) + warr.append((1 + dv / ckms) * w) + warr = np.array(warr) + flux_resampled = np.interp(warr, wave_obs, influx) + np.convolve(flux_resampled, ) + + return outspec_padded[self._unpadded_inds] def to_oldstyle(self): obs = {} diff --git a/tests/test_agn_eline.py b/tests/test_agn_eline.py index 8c173839..1cd5a61e 100644 --- a/tests/test_agn_eline.py +++ b/tests/test_agn_eline.py @@ -1,7 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -from copy import deepcopy import numpy as np from sedpy import observate diff --git a/tests/test_predict.py b/tests/test_predict.py index 9fd74853..e2323cfd 100644 --- a/tests/test_predict.py +++ b/tests/test_predict.py @@ -58,7 +58,9 @@ def test_prediction_nodata(build_sps): sobs.uncertainty = None pred, mfrac = model.predict(model.theta, observations=[sobs, pobs], sps=sps) assert len(pred[0]) == len(sps.wavelengths) + assert np.any(np.isfinite(pred[0])) assert len(pred[1]) == len(pobs.filterset) + assert np.any(np.isfinite(pred[1])) def test_multispec(build_sps): From daf4e034e1f5672dce552902fd04c6740916b6f6 Mon Sep 17 00:00:00 2001 From: Benjamin Johnson Date: Fri, 7 Jul 2023 15:48:09 -0400 Subject: [PATCH 24/33] Add instrumental resolution to emission linewidths; Treat case of dummy observation that is not any of the recognized kinds; Bump python version requirement. --- prospect/models/sedmodel.py | 9 ++++++++- prospect/observation/__init__.py | 2 +- prospect/sources/galaxy_basis.py | 4 ++++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/prospect/models/sedmodel.py b/prospect/models/sedmodel.py index 022b4cd8..e36a20af 100644 --- a/prospect/models/sedmodel.py +++ b/prospect/models/sedmodel.py @@ -133,6 +133,8 @@ def predict_obs(self, obs): prediction = self.predict_lines(obs) elif obs.kind == "photometry": prediction = self.predict_phot(obs.filterset) + else: + prediction = None return prediction def predict_spec(self, obs, **extras): @@ -438,12 +440,17 @@ def cache_eline_parameters(self, obs, nsigma=5, forcelines=False): self._fix_eline_pixelmask = np.array([], dtype=bool) return - # observed linewidths + # linewidths nline = self._ewave_obs.shape[0] + # physical linewidths self._eline_sigma_kms = np.atleast_1d(self.params.get('eline_sigma', 100.0)) # what is this wierd construction for? self._eline_sigma_kms = (self._eline_sigma_kms[None] * np.ones(nline)).squeeze() #self._eline_sigma_lambda = eline_sigma_kms * self._ewave_obs / ckms + # instrumental linewidths + if obs.resolution is not None: + sigma_inst = np.interp(self._ewave_obs, obs.wavelength, obs.resolution) + self._eline_sigma_kms = np.hypot(self._eline_sigma_kms, sigma_inst) # --- get valid lines --- # fixed and fit lines specified by user, but remove any lines which do diff --git a/prospect/observation/__init__.py b/prospect/observation/__init__.py index cc72dfc9..c130e1ef 100644 --- a/prospect/observation/__init__.py +++ b/prospect/observation/__init__.py @@ -2,5 +2,5 @@ from .observation import Photometry, Spectrum, Lines, from_oldstyle -__all__ = ["Photometry", "Spectrum", "Lines", +__all__ = ["Observation", "Photometry", "Spectrum", "Lines", "from_oldstyle"] diff --git a/prospect/sources/galaxy_basis.py b/prospect/sources/galaxy_basis.py index da616086..fb967446 100644 --- a/prospect/sources/galaxy_basis.py +++ b/prospect/sources/galaxy_basis.py @@ -161,6 +161,10 @@ def logage(self): def wavelengths(self): return self.ssp.wavelengths.copy() + @property + def spectral_resolution(self): + r = getattr(self.ssp, "resolutions", np.array(0)) + return r class FastStepBasis(SSPBasis): """Subclass of :py:class:`SSPBasis` that implements a "nonparameteric" From a252c46bd473c87f3a100602f2fa23adfb90f4c4 Mon Sep 17 00:00:00 2001 From: Benjamin Johnson Date: Sun, 6 Aug 2023 03:12:37 -0400 Subject: [PATCH 25/33] handle spectrum padding when no resoltion or wavelength; actually test in lnlike_testing --- prospect/observation/observation.py | 25 +++++++++++++++++-------- tests/test_predict.py | 22 +++++++++++++++++----- 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/prospect/observation/observation.py b/prospect/observation/observation.py index 26a4bc98..676ee4f8 100644 --- a/prospect/observation/observation.py +++ b/prospect/observation/observation.py @@ -293,14 +293,17 @@ def __init__(self, self.pad_wavelength_array() def pad_wavelength_array(self, lambda_pad=100): + if self.wavelength is None: + return #wave_min = self.wave_min * (1 - np.arange(npad, 0, -1) * Kdelta[0] / ckms) low_pad = np.arange(lambda_pad, 1, (self.wavelength[0]-self.wavelength[1])) hi_pad = np.arange(1, lambda_pad, (self.wavelength[-1]-self.wavelength[-2])) wave_min = self.wave_min - low_pad wave_max = self.wave_max + hi_pad self.padded_wavelength = np.concatenate([wave_min, self.wavelength, wave_max]) - self.padded_resolution = np.interp(self.padded_wavelength, self.wavelength, self.resolution) self._unpadded_inds = slice(len(low_pad), -len(hi_pad)) + if self.resolution is not None: + self.padded_resolution = np.interp(self.padded_wavelength, self.wavelength, self.resolution) def smooth_lsf_fft(self, inwave, influx, outwave, sigma): dw = np.gradient(outwave) @@ -328,23 +331,29 @@ def instrumental_smoothing(self, wave_obs, influx, zred=0, libres=0): Parameters ---------- - obswave : ndarray of shape (N_pix_model,) - Observed frame wavelengths, in units of AA for the model + wave_obs : ndarray of shape (N_pix_model,) + Observed frame wavelengths, in units of AA for the *model* influx : ndarray of shape (N_pix_model,) - Flux array + Flux array corresponding to the observed frame wavelengths libres : float or ndarray of shape (N_pix_model,) - Library resolution in units of km/s (dispersion) to be subtracted from the smoothing kernel. - This should be in the observed frame and *on the same wavelength grid as obs.wavelength* + Library resolution in units of km/s (dispersion) to be subtracted + from the smoothing kernel. This should be in the observed frame and + on the same wavelength grid as obswave Returns ------- outflux : ndarray of shape (ndata,) If instrument resolution is not None, this is the smoothed flux on - the observed ``wavelength`` grid. If resolution is None, this just - passes ``influx`` right back again. + the observed ``wavelength`` grid. If wavelength is None, this just + passes ``influx`` right back again. If ``resolution`` is None then + ``influx`` is simply interpolated onto the wavelength grid """ + if self.wavelength is None: + return influx + if self.resolution is None: + return np.interp(self.wavelength, wave_obs, influx) # interpolate library resolution onto the instrumental wavelength grid Klib = np.interp(self.padded_wavelength, wave_obs, libres) # quadrature difference of instrumental and library reolution diff --git a/tests/test_predict.py b/tests/test_predict.py index e2323cfd..b0a5eae1 100644 --- a/tests/test_predict.py +++ b/tests/test_predict.py @@ -97,9 +97,21 @@ def lnlike_testing(build_sps): from prospect.likelihood.likelihood import compute_lnlike from prospect.fitting import lnprobfn - lnp = lnprobfn(model.theta, model=model, observations=obslist, sps=sps) - #%timeit model.prior_product(model.theta) - #%timeit predictions, x = model.predict(model.theta + np.random.uniform(0, 3) * arr, observations=obslist, sps=sps) - #%timeit lnp_data = [compute_lnlike(pred, obs, vectors={}) for pred, obs in zip(predictions, observations)] - #%timeit lnp = lnprobfn(model.theta + np.random.uniform(0, 3) * arr, model=model, observations=obslist, sps=sps) + predictions, x = model.predict(model.theta, observations, sps=sps) + lnp_data = [compute_lnlike(pred, obs, vectors={}) for pred, obs + in zip(predictions, observations)] + assert np.all([np.isscalar(p) for p in lnp_data]) + assert len(lnp_data) == len(observations) + + lnp = lnprobfn(model.theta, model=model, observations=observations, sps=sps) + + assert np.isscalar(lnp) + + # %timeit model.prior_product(model.theta) + # arr = np.zeros(model.ndim) + # arr[-1] = 1 + # theta = model.theta.copy() + # %timeit predictions, x = model.predict(theta + np.random.uniform(-0.1, 0.1) * arr, observations=observations, sps=sps) + # %timeit lnp_data = [compute_lnlike(pred, obs, vectors={}) for pred, obs in zip(predictions, observations)] + # %timeit lnp = lnprobfn(theta + np.random.uniform(0, 3) * arr, model=model, observations=observations, sps=sps) From 31ad26cdd5218ae029bbd547d46825a12d9aa961 Mon Sep 17 00:00:00 2001 From: Benjamin Johnson Date: Sun, 6 Aug 2023 13:52:19 -0400 Subject: [PATCH 26/33] return scalar from NoiseModel.lnlike when using outlier model. --- prospect/likelihood/noise_model.py | 2 +- prospect/observation/observation.py | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/prospect/likelihood/noise_model.py b/prospect/likelihood/noise_model.py index f1585ce6..db0cd480 100644 --- a/prospect/likelihood/noise_model.py +++ b/prospect/likelihood/noise_model.py @@ -51,7 +51,7 @@ def lnlike(self, pred, obs, vectors={}): var_bad = var * (self.n_sigma_outlier**2) lnp_bad = -0.5*((delta**2 / var_bad) + np.log(2*np.pi*var_bad)) lnp_tot = np.logaddexp(lnp + np.log(1 - self.f_outlier), lnp_bad + np.log(self.f_outlier)) - return lnp_tot + return np.sum(lnp_tot) else: raise ValueError("f_outlier must be >= 0") diff --git a/prospect/observation/observation.py b/prospect/observation/observation.py index 676ee4f8..8089eb06 100644 --- a/prospect/observation/observation.py +++ b/prospect/observation/observation.py @@ -293,6 +293,9 @@ def __init__(self, self.pad_wavelength_array() def pad_wavelength_array(self, lambda_pad=100): + """Pad the wavelength and, if present, resolution arrays so that FFTs + can be used on the models without edge effects. + """ if self.wavelength is None: return #wave_min = self.wave_min * (1 - np.arange(npad, 0, -1) * Kdelta[0] / ckms) @@ -356,10 +359,10 @@ def instrumental_smoothing(self, wave_obs, influx, zred=0, libres=0): return np.interp(self.wavelength, wave_obs, influx) # interpolate library resolution onto the instrumental wavelength grid Klib = np.interp(self.padded_wavelength, wave_obs, libres) - # quadrature difference of instrumental and library reolution + # quadrature difference of instrumental and library resolution + assert np.all(self.padded_resolution >= Klib), "data higher resolution than library" Kdelta = np.sqrt(self.padded_resolution**2 - Klib**2) Kdelta_lambda = Kdelta / CKMS * self.padded_wavelength - outspec_padded = self.smooth_lsf_fft(wave_obs, influx, self.padded_wavelength, From e4c5436079db77e93e3ccbfae230165e8a9d6718 Mon Sep 17 00:00:00 2001 From: Benjamin Johnson Date: Sun, 6 Aug 2023 14:52:37 -0400 Subject: [PATCH 27/33] add some noise model parameter docs. --- prospect/likelihood/noise_model.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/prospect/likelihood/noise_model.py b/prospect/likelihood/noise_model.py index db0cd480..1fe0a044 100644 --- a/prospect/likelihood/noise_model.py +++ b/prospect/likelihood/noise_model.py @@ -25,6 +25,11 @@ def __init__(self, frac_out_name="f_outlier", self.nsigma_out_name = nsigma_out_name self.kernels = [] + def _available_parameters(self): + new_pars = [(self.frac_out_name, "Fraction of data points that are outliers"), + (self.nsigma_out_name, "Dispersion of the outlier distribution, in units of chi")] + return new_pars + def update(self, **params): self.f_outlier = params.get(self.frac_out_name, 0) self.n_sigma_outlier = params.get(self.nsigma_out_name, 50) @@ -100,6 +105,13 @@ def __init__(self, frac_out_name="f_outlier", self.metric_name = metric_name self.mask_name = mask_name + def _available_parameters(self): + new_pars = [(self.frac_out_name, "Fraction of data points that are outliers"), + (self.nsigma_out_name, "Dispersion of the outlier distribution, in units of chi")] + for kernel in self.kernels: + new_pars += getattr(kernel, "_available_parameters", []) + return new_pars + def construct_covariance(self, **vectors): """Construct a covariance matrix from a metric, a list of kernel objects, and a list of weight vectors (of same length as the metric) From 10232a4606b4c07bf9ddc4f3ed1b3956472e38c5 Mon Sep 17 00:00:00 2001 From: Benjamin Johnson Date: Sat, 26 Aug 2023 16:30:55 -0400 Subject: [PATCH 28/33] fix Observation class attributes for H5 storage. --- prospect/__init__.py | 1 + prospect/io/read_results.py | 4 +- prospect/models/sedmodel.py | 2 +- prospect/observation/__init__.py | 5 +- prospect/observation/observation.py | 116 +++++++++++++++------------- 5 files changed, 71 insertions(+), 57 deletions(-) diff --git a/prospect/__init__.py b/prospect/__init__.py index f1213763..338b909b 100644 --- a/prospect/__init__.py +++ b/prospect/__init__.py @@ -4,6 +4,7 @@ pass from . import models +from . import observation from . import fitting from . import io from . import sources diff --git a/prospect/io/read_results.py b/prospect/io/read_results.py index 40d9d789..cff75423 100644 --- a/prospect/io/read_results.py +++ b/prospect/io/read_results.py @@ -183,17 +183,17 @@ def read_hdf5(filename, **extras): res.update(groups['sampling']) res["bestfit"] = groups["bestfit"] res["optimization"] = groups["optimization"] + # do observations if 'observations' in hf: obs = obs_from_h5(hf['observations']) else: obs = None - #res['obs'] = obs return res, obs def obs_from_h5(obsgroup): - from ..data.observation import from_serial + from ..observation import from_serial observations = [] for obsname, dset in obsgroup.items(): arr, meta = dset[:], dict(dset.attrs) diff --git a/prospect/models/sedmodel.py b/prospect/models/sedmodel.py index e36a20af..4e1625f6 100644 --- a/prospect/models/sedmodel.py +++ b/prospect/models/sedmodel.py @@ -578,7 +578,7 @@ def fit_mle_elines(self, obs, calibrated_spec, sigma_spec=None): # Cache the ln-penalty # FIXME this needs to be acumulated if there are multiple spectra - self._ln_eline_penalty = K + self._ln_eline_penalty += K # Store fitted emission line luminosities in physical units self._eline_lum[idx] = alpha_bar / linecal diff --git a/prospect/observation/__init__.py b/prospect/observation/__init__.py index c130e1ef..ebf9c627 100644 --- a/prospect/observation/__init__.py +++ b/prospect/observation/__init__.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- -from .observation import Photometry, Spectrum, Lines, from_oldstyle +from .observation import Photometry, Spectrum, Lines +from .observation import from_oldstyle, from_serial __all__ = ["Observation", "Photometry", "Spectrum", "Lines", - "from_oldstyle"] + "from_oldstyle", "from_serial"] diff --git a/prospect/observation/observation.py b/prospect/observation/observation.py index 8089eb06..8cce6727 100644 --- a/prospect/observation/observation.py +++ b/prospect/observation/observation.py @@ -39,8 +39,8 @@ class Observation: logify_spectrum = False alias = {} - _meta = ["kind", "name"] - _data = ["wavelength", "flux", "uncertainty", "mask"] + _meta = ("kind", "name") + _data = ("wavelength", "flux", "uncertainty", "mask") def __init__(self, flux=None, @@ -147,14 +147,23 @@ def to_struct(self, data_dtype=np.float32): """Convert data to a structured array """ self._automask() - dtype = np.dtype([(c, data_dtype) for c in self._data]) - struct = np.zeros(self.ndata, dtype=dtype) + cols = [] for c in self._data: + dat = getattr(self, c) + if (dat is None): + continue + if (len(dat) != self.ndata): + continue + #raise ValueError(f"The {c} attribute of the {self.name} observation has the wrong length ({len(dat)} instead of {self.ndata})") + cols += [(c, dat.dtype)] + dtype = np.dtype(cols) + struct = np.zeros(self.ndata, dtype=dtype) + for c in dtype.names: data = getattr(self, c) - try: + if c is not None: struct[c] = data - except(ValueError): - pass + #except(ValueError): + # pass return struct def to_fits(self, filename=""): @@ -176,8 +185,16 @@ def to_json(self): serial = json.dumps(obs, cls=NumpyEncoder) return serial + def to_oldstyle(self): + obs = {} + obs.update(vars(self)) + for k, v in self.alias.items(): + obs[k] = self[v] + _ = obs.pop(v) + return obs + @property - def to_nJy(self): + def maggies_to_nJy(self): return 1e9 * 3631 @@ -188,7 +205,7 @@ class Photometry(Observation): maggies_unc="uncertainty", filters="filters", phot_mask="mask") - _meta = ["kind", "name", "filternames"] + _meta = ("kind", "name", "filternames") def __init__(self, filters=[], name="PhotA", **kwargs): """On Observation object that holds photometric data @@ -211,6 +228,8 @@ def __init__(self, filters=[], name="PhotA", **kwargs): super(Photometry, self).__init__(name=name, **kwargs) def set_filters(self, filters): + + # TODO: Make this less convoluted if (len(filters) == 0) or (filters is None): self.filters = filters self.filternames = [] @@ -235,13 +254,7 @@ def wavelength(self): return np.array([f.wave_effective for f in self.filters]) def to_oldstyle(self): - obs = {} - obs.update(vars(self)) - for k, v in self.alias.items(): - obs[k] = self[v] - _ = obs.pop(v) - #obs.update({k: self[v] for k, v in self.alias.items()}) - #_ = [obs.pop(k) for k in ["flux", "uncertainty", "mask"]] + obs = super(Photometry, self).to_oldstyle() obs["phot_wave"] = self.wavelength return obs @@ -254,14 +267,16 @@ class Spectrum(Observation): wavelength="wavelength", mask="mask") - data = ["wavelength", "flux", "uncertainty", "mask", - "resolution", "calibration"] + _meta = ("kind", "name", "lambda_pad") + _data = ("wavelength", "flux", "uncertainty", "mask", + "resolution", "calibration") def __init__(self, wavelength=None, resolution=None, calibration=None, name="SpecA", + lambda_pad=100, **kwargs): """ @@ -290,17 +305,18 @@ def __init__(self, self.calibration = calibration self.instrument_smoothing_parameters = dict(smoothtype="vel", fftsmooth=True) assert np.all(np.diff(self.wavelength) > 0) + self.lambda_pad = lambda_pad self.pad_wavelength_array() - def pad_wavelength_array(self, lambda_pad=100): + def pad_wavelength_array(self): """Pad the wavelength and, if present, resolution arrays so that FFTs can be used on the models without edge effects. """ if self.wavelength is None: return - #wave_min = self.wave_min * (1 - np.arange(npad, 0, -1) * Kdelta[0] / ckms) - low_pad = np.arange(lambda_pad, 1, (self.wavelength[0]-self.wavelength[1])) - hi_pad = np.arange(1, lambda_pad, (self.wavelength[-1]-self.wavelength[-2])) + + low_pad = np.arange(self.lambda_pad, 1, (self.wavelength[0]-self.wavelength[1])) + hi_pad = np.arange(1, self.lambda_pad, (self.wavelength[-1]-self.wavelength[-2])) wave_min = self.wave_min - low_pad wave_max = self.wave_max + hi_pad self.padded_wavelength = np.concatenate([wave_min, self.wavelength, wave_max]) @@ -308,7 +324,7 @@ def pad_wavelength_array(self, lambda_pad=100): if self.resolution is not None: self.padded_resolution = np.interp(self.padded_wavelength, self.wavelength, self.resolution) - def smooth_lsf_fft(self, inwave, influx, outwave, sigma): + def _smooth_lsf_fft(self, inwave, influx, outwave, sigma): dw = np.gradient(outwave) sigma_per_pixel = (dw / sigma) cdf = np.cumsum(sigma_per_pixel) @@ -357,35 +373,22 @@ def instrumental_smoothing(self, wave_obs, influx, zred=0, libres=0): return influx if self.resolution is None: return np.interp(self.wavelength, wave_obs, influx) + # interpolate library resolution onto the instrumental wavelength grid Klib = np.interp(self.padded_wavelength, wave_obs, libres) - # quadrature difference of instrumental and library resolution assert np.all(self.padded_resolution >= Klib), "data higher resolution than library" + + # quadrature difference of instrumental and library resolution Kdelta = np.sqrt(self.padded_resolution**2 - Klib**2) Kdelta_lambda = Kdelta / CKMS * self.padded_wavelength - outspec_padded = self.smooth_lsf_fft(wave_obs, - influx, - self.padded_wavelength, - Kdelta_lambda) - if False: - warr = [wave_min] - while warr[-1] < wave_max: - w = warr[-1] - dv = np.interp(w, self.wavelength, Kdelta) - warr.append((1 + dv / ckms) * w) - warr = np.array(warr) - flux_resampled = np.interp(warr, wave_obs, influx) - np.convolve(flux_resampled, ) - return outspec_padded[self._unpadded_inds] + # Smooth by the difference kernel + outspec_padded = self._smooth_lsf_fft(wave_obs, + influx, + self.padded_wavelength, + Kdelta_lambda) - def to_oldstyle(self): - obs = {} - obs.update(vars(self)) - for k, v in self.alias.items(): - obs[k] = self[v] - _ = obs.pop(v) - return obs + return outspec_padded[self._unpadded_inds] class Lines(Spectrum): @@ -397,8 +400,9 @@ class Lines(Spectrum): mask="mask", line_inds="line_ind") - data = ["wavelength", "flux", "uncertainty", "mask", - "resolution", "calibration", "line_ind"] + _meta = ("name", "kind") + _data = ("wavelength", "flux", "uncertainty", "mask", + "resolution", "calibration", "line_ind") def __init__(self, line_ind=None, @@ -428,7 +432,7 @@ def __init__(self, :param calibration: not sure yet .... """ - super(Lines, self).__init__(name=name, **kwargs) + super(Lines, self).__init__(name=name, resolution=None, **kwargs) assert (line_ind is not None), "You must identify the lines by their index in the FSPS emission line array" self.line_ind = np.array(line_ind).as_type(int) @@ -449,11 +453,19 @@ def from_oldstyle(obs, **kwargs): def from_serial(arr, meta): + kind = obstypes[meta.pop("kind")] + adict = {a:arr[a] for a in arr.dtype.names} - adict["name"] = meta.get("name", "") + adict["name"] = meta.pop("name", "") if 'filters' in meta: - adict["filters"] = meta["filters"].split(",") - obs = obstypes[meta["kind"]](**adict) - #[setattr(obs, m, v) for m, v in meta.items()] + adict["filters"] = meta.pop("filters").split(",") + + obs = kind(**adict) + + # set other metadata as attributes? No, needs to be during instantiation + #for k, v in meta.items(): + # if k in kind._meta: + # setattr(obs, k, v) + return obs From 2a7c5fd4c2253cb9560c374bfd09c2b4c0b9671f Mon Sep 17 00:00:00 2001 From: Benjamin Johnson Date: Mon, 18 Sep 2023 07:40:37 -0400 Subject: [PATCH 29/33] default Observation names now include a the hex id to distinguish instances. --- prospect/observation/observation.py | 31 ++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/prospect/observation/observation.py b/prospect/observation/observation.py index 8cce6727..358e180c 100644 --- a/prospect/observation/observation.py +++ b/prospect/observation/observation.py @@ -4,7 +4,7 @@ import numpy as np from sedpy.observate import FilterSet -from sedpy.smoothing import smoothspec, smooth_fft +from sedpy.smoothing import smooth_fft from ..likelihood.noise_model import NoiseModel @@ -15,6 +15,7 @@ CKMS = 2.998e5 + class NumpyEncoder(json.JSONEncoder): def default(self, obj): @@ -37,6 +38,7 @@ class Observation: noise : """ + kind = "observation" logify_spectrum = False alias = {} _meta = ("kind", "name") @@ -47,7 +49,7 @@ def __init__(self, uncertainty=None, mask=slice(None), noise=NoiseModel(), - name="ObsA", + name=None, **kwargs ): @@ -55,8 +57,15 @@ def __init__(self, self.uncertainty = np.array(uncertainty) self.mask = mask self.noise = noise - self.name = name self.from_oldstyle(**kwargs) + if name is None: + addr = f"{id(self):04x}" + self.name = f"{self.kind[:5]}-{addr[:6]}" + else: + self.name = name + + def __str__(self): + return f"{self.kind} ({self.name})" def __getitem__(self, item): """Dict-like interface for backwards compatibility @@ -207,7 +216,9 @@ class Photometry(Observation): phot_mask="mask") _meta = ("kind", "name", "filternames") - def __init__(self, filters=[], name="PhotA", **kwargs): + def __init__(self, filters=[], + name=None, + **kwargs): """On Observation object that holds photometric data Parameters @@ -275,7 +286,7 @@ def __init__(self, wavelength=None, resolution=None, calibration=None, - name="SpecA", + name=None, lambda_pad=100, **kwargs): @@ -304,8 +315,14 @@ def __init__(self, self.resolution = resolution self.calibration = calibration self.instrument_smoothing_parameters = dict(smoothtype="vel", fftsmooth=True) - assert np.all(np.diff(self.wavelength) > 0) self.lambda_pad = lambda_pad + if self.wavelength is not None: + self.set_wavelength(self.wavelength) + + # TODO make this a proper settr/gettr for wavelenth attribute + def set_wavelength(self, wavelength): + self.wavelength = wavelength + assert np.all(np.diff(self.wavelength) > 0) self.pad_wavelength_array() def pad_wavelength_array(self): @@ -406,7 +423,7 @@ class Lines(Spectrum): def __init__(self, line_ind=None, - name="SpecA", + name=None, **kwargs): """ From ede002ad20a8789fcce99eb9471f00a637614023 Mon Sep 17 00:00:00 2001 From: Benjamin Johnson Date: Mon, 18 Sep 2023 08:11:38 -0400 Subject: [PATCH 30/33] begin work on undersampledSpectrum. --- prospect/observation/observation.py | 50 +++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/prospect/observation/observation.py b/prospect/observation/observation.py index 358e180c..37831d8e 100644 --- a/prospect/observation/observation.py +++ b/prospect/observation/observation.py @@ -5,6 +5,7 @@ from sedpy.observate import FilterSet from sedpy.smoothing import smooth_fft +from sedpy.observate import rebin from ..likelihood.noise_model import NoiseModel @@ -311,19 +312,22 @@ def __init__(self, not sure yet .... """ super(Spectrum, self).__init__(name=name, **kwargs) - self.wavelength = wavelength + self.lambda_pad = lambda_pad self.resolution = resolution self.calibration = calibration self.instrument_smoothing_parameters = dict(smoothtype="vel", fftsmooth=True) - self.lambda_pad = lambda_pad - if self.wavelength is not None: - self.set_wavelength(self.wavelength) - - # TODO make this a proper settr/gettr for wavelenth attribute - def set_wavelength(self, wavelength): self.wavelength = wavelength - assert np.all(np.diff(self.wavelength) > 0) - self.pad_wavelength_array() + + @property + def wavelength(self): + return self._wavelength + + @wavelength.setter + def wavelength(self, wave): + self._wavelength = wave + if self._wavelength is not None: + assert np.all(np.diff(self._wavelength) > 0) + self.pad_wavelength_array() def pad_wavelength_array(self): """Pad the wavelength and, if present, resolution arrays so that FFTs @@ -408,6 +412,34 @@ def instrumental_smoothing(self, wave_obs, influx, zred=0, libres=0): return outspec_padded[self._unpadded_inds] +class UndersampledSpectrum(Spectrum): + + def _smooth_lsf_fft(self, inwave, influx, outwave, sigma): + raise NotImplementedError + # TODO does this need to be changed if outwave is undersampled? + # TODO testing + dw = np.gradient(outwave) + sigma_per_pixel = (dw / sigma) + cdf = np.cumsum(sigma_per_pixel) + cdf /= cdf.max() + # check: do we need this? + x_per_pixel = np.gradient(cdf) + x_per_sigma = np.nanmedian(x_per_pixel / sigma_per_pixel) + pix_per_sigma = 1 + N = pix_per_sigma / x_per_sigma + nx = int(2**np.ceil(np.log2(N))) + # now evenly sample in the x coordinate + x = np.linspace(0, 1, nx) + dx = 1.0 / nx + # convert x to wave + lam = np.interp(x, cdf, outwave) + newflux = np.interp(lam, inwave, influx) + flux_conv = smooth_fft(dx, newflux, x_per_sigma) + # TODO - does this do the right thing regarding edge/center of pixels? + outflux = rebin(outwave, lam, flux_conv) + return outflux + + class Lines(Spectrum): kind = "lines" From c15ccc2f47fd76e34228d58767d9846e752e47aa Mon Sep 17 00:00:00 2001 From: Benjamin Johnson Date: Wed, 3 Jan 2024 13:59:28 -0500 Subject: [PATCH 31/33] fix bug in eline pixelmask generation. --- prospect/models/sedmodel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prospect/models/sedmodel.py b/prospect/models/sedmodel.py index 4e1625f6..76219e90 100644 --- a/prospect/models/sedmodel.py +++ b/prospect/models/sedmodel.py @@ -458,7 +458,7 @@ def cache_eline_parameters(self, obs, nsigma=5, forcelines=False): # This part has to go in every call linewidth = nsigma * self._ewave_obs / ckms * self._eline_sigma_kms pixel_mask = (np.abs(self._outwave - self._ewave_obs[:, None]) < linewidth[:, None]) - pixel_mask = pixel_mask & obs.get("mask")[None, :] + pixel_mask = pixel_mask & obs.get("mask", np.ones_like(self._outwave))[None, :] self._valid_eline = pixel_mask.any(axis=1) & self._use_eline # --- wavelengths corresponding to valid lines --- From b55325ddb59502c4019ce1b42ee759243c2a4f9a Mon Sep 17 00:00:00 2001 From: Benjamin Johnson Date: Thu, 4 Jan 2024 10:08:12 -0500 Subject: [PATCH 32/33] rectify obs before prediction in agn test. --- tests/test_agn_eline.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_agn_eline.py b/tests/test_agn_eline.py index 1cd5a61e..a1090bc1 100644 --- a/tests/test_agn_eline.py +++ b/tests/test_agn_eline.py @@ -25,6 +25,7 @@ def test_agn_elines(): flux=np.ones(1000), uncertainty=np.ones(1000)*0.1) obs = [spec, phot] + [ob.rectify() for ob in obs] # --- model --- model_pars = TemplateLibrary["parametric_sfh"] From 1154ca74fb41208f7466e592c8681cd8c65a3187 Mon Sep 17 00:00:00 2001 From: Benjamin Johnson Date: Mon, 8 Jan 2024 13:21:55 -0500 Subject: [PATCH 33/33] fix tests; start on multispec tests. --- tests/test_eline.py | 34 ++++++-------- tests/test_lnlike.py | 90 +++++++++++++++++++++++++++++++++++ tests/test_multispec.py | 102 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 207 insertions(+), 19 deletions(-) create mode 100644 tests/test_lnlike.py create mode 100644 tests/test_multispec.py diff --git a/tests/test_eline.py b/tests/test_eline.py index 293d7e70..7330c425 100644 --- a/tests/test_eline.py +++ b/tests/test_eline.py @@ -59,16 +59,23 @@ def test_eline_parsing(): assert model._fit_eline.sum() == (len(model._use_eline) - len(fix_lines)) -def test_nebline_phot_addition(get_sps): - fnames = [f"sdss_{b}0" for b in "ugriz"] - filts = observate.load_filters(fnames) - +def build_obs(filts): obs = dict(filters=filts, wavelength=np.linspace(3000, 9000, 1000), spectrum=np.ones(1000), - unc=np.ones(1000)*0.1) + unc=np.ones(1000)*0.1, + maggies=np.ones(len(filts))*1e-7, + maggies_unc=np.ones(len(filts))*1e-8) sdat, pdat = from_oldstyle(obs) obslist = [sdat, pdat] + [obs.rectify() for obs in obslist] + return obslist + + +def test_nebline_phot_addition(get_sps): + fnames = [f"sdss_{b}0" for b in "ugriz"] + filts = observate.load_filters(fnames) + obslist = build_obs(filts) sps = get_sps @@ -104,13 +111,8 @@ def test_filtersets(get_sps): """ fnames = [f"sdss_{b}0" for b in "ugriz"] flist = observate.load_filters(fnames) - - obs = dict(wavelength=np.linspace(3000, 9000, 1000), - spectrum=np.ones(1000), - unc=np.ones(1000)*0.1, - filters=fnames) - sdat, pdat = from_oldstyle(obs) - obslist = [sdat, pdat] + obslist = build_obs(flist) + sdat, pdat = obslist sps = get_sps @@ -149,13 +151,7 @@ def test_eline_implementation(get_sps): test_eline_parsing() filters = observate.load_filters([f"sdss_{b}0" for b in "ugriz"]) - obs = dict(filters=filters, - wavelength=np.linspace(3000, 9000, 1000), - spectrum=np.ones(1000), - unc=np.ones(1000)*0.1, - maggies=np.ones(len(filters))*1e-7, - maggies_unc=np.ones(len(filters))*1e-8) - obslist = from_oldstyle(obs) + obslist = build_obs(filters) model_pars = TemplateLibrary["parametric_sfh"] model_pars.update(TemplateLibrary["nebular"]) diff --git a/tests/test_lnlike.py b/tests/test_lnlike.py new file mode 100644 index 00000000..a1f4209c --- /dev/null +++ b/tests/test_lnlike.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys +import numpy as np + +import pytest + +from prospect.sources import CSPSpecBasis +from prospect.models import SpecModel, templates +from prospect.observation import Spectrum, Photometry +from prospect.likelihood import NoiseModel +from prospect.likelihood.likelihood import compute_lnlike +from prospect.fitting import lnprobfn + + +@pytest.fixture +def get_sps(): + sps = CSPSpecBasis(zcontinuous=1) + return sps + + +def build_model(add_neb=False, add_outlier=False): + model_params = templates.TemplateLibrary["parametric_sfh"] + if add_neb: + model_params.update(templates.TemplateLibrary["nebular"]) + if add_outlier: + model_params.update(templates.TemplateLibrary["outlier_model"]) + model_params["f_outlier_phot"]["isfree"] = True + model_params["f_outlier_phot"]["init"] = 0.05 + return SpecModel(model_params) + + +def build_obs(multispec=True, add_outlier=True): + N = 1500 * (2 - multispec) + wmax = 7000 + wsplit = wmax - N * multispec + + fnames = list([f"sdss_{b}0" for b in "ugriz"]) + Nf = len(fnames) + phot = [Photometry(filters=fnames, flux=np.ones(Nf), uncertainty=np.ones(Nf)/10)] + spec = [Spectrum(wavelength=np.linspace(4000, wsplit, N), + flux=np.ones(N), uncertainty=np.ones(N) / 10, + mask=slice(None))] + + if add_outlier: + phot[0].noise = NoiseModel(frac_out_name='f_outlier_phot', + nsigma_out_name='nsigma_outlier_phot') + + if multispec: + spec += [Spectrum(wavelength=np.linspace(wsplit+1, wmax, N), + flux=np.ones(N), uncertainty=np.ones(N) / 10, + mask=slice(None))] + + obslist = spec + phot + [obs.rectify() for obs in obslist] + return obslist + + +def test_lnlike_shape(get_sps): + # testing lnprobfn + sps = get_sps + + for add_out in [True, False]: + observations = build_obs(add_outlier=add_out) + model = build_model(add_neb=add_out, add_outlier=add_out) + + model.set_parameters(model.theta) + [obs.noise.update(**model.params) for obs in observations + if obs.noise is not None] + predictions, x = model.predict(model.theta, observations, sps=sps) + + # check you get a scalar lnp for each observation + lnp_data = [compute_lnlike(pred, obs, vectors={}) for pred, obs + in zip(predictions, observations)] + assert np.all([np.isscalar(p) for p in lnp_data]), f"failed for add_outlier={add_out}" + assert len(lnp_data) == len(observations), f"failed for add_outlier={add_out}" + + # check lnprobfn returns scalar + lnp = lnprobfn(model.theta, model=model, observations=observations, sps=sps) + + assert np.isscalar(lnp), f"failed for add_outlier={add_out}" + + # %timeit model.prior_product(model.theta) + # arr = np.zeros(model.ndim) + # arr[-1] = 1 + # theta = model.theta.copy() + # %timeit predictions, x = model.predict(theta + np.random.uniform(-0.1, 0.1) * arr, observations=observations, sps=sps) + # %timeit lnp_data = [compute_lnlike(pred, obs, vectors={}) for pred, obs in zip(predictions, observations)] + # %timeit lnp = lnprobfn(theta + np.random.uniform(0, 3) * arr, model=model, observations=observations, sps=sps) diff --git a/tests/test_multispec.py b/tests/test_multispec.py new file mode 100644 index 00000000..3d280eb5 --- /dev/null +++ b/tests/test_multispec.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys +import numpy as np + +import pytest + +from sedpy.observate import load_filters +from prospect.sources import CSPSpecBasis +from prospect.models import SpecModel, templates +from prospect.observation import Spectrum, Photometry + + +@pytest.fixture(scope="module") +def build_sps(): + sps = CSPSpecBasis(zcontinuous=1) + return sps + + +def build_model(add_neb=False): + model_params = templates.TemplateLibrary["parametric_sfh"] + if add_neb: + model_params.update(templates.TemplateLibrary["nebular"]) + return SpecModel(model_params) + + +def build_obs(multispec=True): + N = 1500 * (2 - multispec) + wmax = 7000 + wsplit = wmax - N * multispec + + fnames = list([f"sdss_{b}0" for b in "ugriz"]) + Nf = len(fnames) + phot = [Photometry(filters=fnames, flux=np.ones(Nf), uncertainty=np.ones(Nf)/10)] + spec = [Spectrum(wavelength=np.linspace(4000, wsplit, N), + flux=np.ones(N), uncertainty=np.ones(N) / 10, + mask=slice(None))] + + if multispec: + spec += [Spectrum(wavelength=np.linspace(wsplit+1, wmax, N), + flux=np.ones(N), uncertainty=np.ones(N) / 10, + mask=slice(None))] + + obslist = spec + phot + [obs.rectify() for obs in obslist] + return obslist + + +def test_prediction_nodata(build_sps): + sps = build_sps + model = build_model(add_neb=True) + sobs, pobs = build_obs(multispec=False) + pobs.flux = None + pobs.uncertainty = None + sobs.wavelength = None + sobs.flux = None + sobs.uncertainty = None + pred, mfrac = model.predict(model.theta, observations=[sobs, pobs], sps=sps) + assert len(pred[0]) == len(sps.wavelengths) + assert len(pred[1]) == len(pobs.filterset) + + +def test_multispec(build_sps): + sps = build_sps + + obslist_single = build_obs(multispec=False) + obslist_multi = build_obs(multispec=True) + model = build_model(add_neb=True) + + preds_single, mfrac = model.predict(model.theta, observations=obslist_single, sps=sps) + preds_multi, mfrac = model.predict(model.theta, observations=obslist_multi, sps=sps) + + assert len(preds_single) == 2 + assert len(preds_multi) == 3 + assert np.allclose(preds_single[-1], preds_multi[-1]) + + # TODO: turn this plot into an actual test + #import matplotlib.pyplot as pl + #fig, ax = pl.subplots() + #ax.plot(obslist_single[0].wavelength, predictions_single[0]) + #for p, o in zip(predictions, obslist): + # if o.kind == "photometry": + # ax.plot(o.wavelength, p, "o") + # else: + # ax.plot(o.wavelength, p) + + +def test_multires(): + # Test the smoothing of multiple spectra to different resolutions + # - give the same wavelength array different instrumental resolutions, assert similar but different, and that smoothing by the difference gives the right answer + # Test the use of two differernt smoothings, physical and instrumental + # - give an obs with no instrument smoothing and one with, make sure they are different + pass + + +def test_multinoise(): + pass + + +def test_multical(): + pass \ No newline at end of file