Merge branch 'master' into gls-chisq

abhisrkckl · web-flow · commit 4e03b95a3129 · 2023-08-17T15:54:30.000-05:00
diff --git a/AUTHORS.rst b/AUTHORS.rst
@@ -20,7 +20,7 @@ Active developers are indicated by (*). Authors of the PINT paper are indicated
 * Anne Archibald (#*)
 * Matteo Bachetti (#)
 * Bastian Beischer 
-* Deven Bhakta 
+* Deven Bhakta (*)
 * Chloe Champagne (#)
 * Jonathan Colen (#)
 * Thankful Cromartie 
diff --git a/CHANGELOG-unreleased.md b/CHANGELOG-unreleased.md
@@ -14,10 +14,12 @@ the released changes.
 - Updated `CONTRIBUTING.rst` with the latest information.
 - Moved design matrix normalization code from `pint.fitter` to the new `pint.utils.normalize_designmatrix()` function.
 - Made `Residuals` independent of `GLSFitter` (GLS chi2 is now computed using the new function `Residuals._calc_gls_chi2()`).
+- Made `TimingModel.params` and `TimingModel.ordered_params` identical. Deprecated `TimingModel.ordered_params`.
 ### Added
 - Third-order Roemer delay terms to ELL1 model
 - Options to add a TZR TOA (`AbsPhase`) during the creation of a `TimingModel` using `ModelBuilder.__call__`, `get_model`, and `get_model_and_toas`
 - `pint.print_info()` function for bug reporting
+- Added an autocorrelation function to check for chain convergence in `event_optimize`
 ### Fixed
 - Deleting JUMP1 from flag tables will not prevent fitting
 - Simulating TOAs from tim file when PLANET_SHAPIRO is true now works
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -14,6 +14,7 @@ the unreleased changes. This file should only be changed while tagging a new ver
 - Unreleased CHANGELOG entries should now be entered in `CHANGELOG-unreleased.md` instead of `CHANGELOG.md`. Updated documentation accordingly.
 - Changed tests to remove `unittest` and use pure pytest format
 - Changed deprecated `sampler.chain` usage
+- Download data automatically in the profiling script `high_level_benchmark.py` instead of silently giving wrong results.
 ### Added
 - `SpindownBase` as the abstract base class for `Spindown` and `PeriodSpindown` in the `How_to_build_a_timing_model_component.py` example.
 - `SolarWindDispersionBase` as the abstract base class for solar wind dispersion components.
diff --git a/profiling/.gitignore b/profiling/.gitignore
@@ -0,0 +1,2 @@
+J0740+6620.cfr+19.tim
+bench_*_summary
diff --git a/profiling/high_level_benchmark.py b/profiling/high_level_benchmark.py
@@ -16,6 +16,7 @@
 import sys
 import os
 import platform
+import urllib.request
 from prfparser import parse_file
 
 
@@ -67,11 +68,19 @@ def get_results(script, outfile):
     parser = argparse.ArgumentParser(
         description="High-level summary of python file timing."
     )
+
+    if not os.path.isfile("J0740+6620.cfr+19.tim"):
+        print("Downloading data file J0740+6620.cfr+19.tim ...")
+        urllib.request.urlretrieve(
+            "https://data.nanograv.org/static/data/J0740+6620.cfr+19.tim",
+            "J0740+6620.cfr+19.tim",
+        )
+
+    script1 = "bench_load_TOAs.py"
     script2 = "bench_chisq_grid.py"
     script3 = "bench_chisq_grid_WLSFitter.py"
     script4 = "bench_MCMC.py"
 
-    script1 = "bench_load_TOAs.py"
     # time scripts
     output1 = bench_file(script1)
     output2 = bench_file(script2)
diff --git a/src/pint/fitter.py b/src/pint/fitter.py
@@ -363,7 +363,7 @@ def get_summary(self, nodmx=False):
 
         # to handle all parameter names, determine the longest length for the first column
         longestName = 0  # optionally specify the minimum length here instead of 0
-        for pn in self.model.params_ordered:
+        for pn in self.model.params:
             if nodmx and pn.startswith("DMX"):
                 continue
             if len(pn) > longestName:
@@ -378,7 +378,7 @@ def get_summary(self, nodmx=False):
         s += ("{:<" + spacingName + "s} {:>20s} {:>28s} {}\n").format(
             "=" * longestName, "=" * 20, "=" * 28, "=" * 5
         )
-        for pn in self.model.params_ordered:
+        for pn in self.model.params:
             if nodmx and pn.startswith("DMX"):
                 continue
             prefitpar = getattr(self.model_init, pn)
diff --git a/src/pint/models/timing_model.py b/src/pint/models/timing_model.py
@@ -183,12 +183,11 @@ class TimingModel:
     removed with methods on this object, and for many of them additional
     parameters in families (``DMXEP_1234``) can be added.
 
-    Parameters in a TimingModel object are listed in the ``model.params`` and
-    ``model.params_ordered`` objects. Each Parameter can be set as free or
-    frozen using its ``.frozen`` attribute, and a list of the free parameters
-    is available through the ``model.free_params`` property; this can also
-    be used to set which parameters are free. Several methods are available
-    to get and set some or all parameters in the forms of dictionaries.
+    Parameters in a TimingModel object are listed in the ``model.params`` object.
+    Each Parameter can be set as free or frozen using its ``.frozen`` attribute,
+    and a list of the free parameters is available through the ``model.free_params``
+    property; this can also be used to set which parameters are free. Several methods
+    are available to get and set some or all parameters in the forms of dictionaries.
 
     TimingModel objects also support a number of functions for computing
     various things like orbital phase, and barycentric versions of TOAs,
@@ -500,20 +499,30 @@ def __getattr__(self, name):
         )
 
     @property_exists
-    def params(self):
-        """List of all parameter names in this model and all its components (order is arbitrary)."""
-        # FIXME: any reason not to just use params_ordered here?
-        p = self.top_level_params
-        for cp in self.components.values():
-            p = p + cp.params
-        return p
+    def params_ordered(self):
+        """List of all parameter names in this model and all its components.
+        This is the same as `params`."""
+
+        # Historically, this was different from `params` because Python
+        # dictionaries were unordered until Python 3.7. Now there is no reason for
+        # them to be different.
+
+        warn(
+            "`TimingModel.params_ordered` is now deprecated and may be removed in the future. "
+            "Use `TimingModel.params` instead. It gives the same output as `TimingModel.params_ordered`.",
+            DeprecationWarning,
+        )
+
+        return self.params
 
     @property_exists
-    def params_ordered(self):
+    def params(self):
         """List of all parameter names in this model and all its components, in a sensible order."""
+
         # Define the order of components in the list
         # Any not included will be printed between the first and last set.
         # FIXME: make order completely canonical (sort components by name?)
+
         start_order = ["astrometry", "spindown", "dispersion"]
         last_order = ["jump_delay"]
         compdict = self.get_components_by_category()
@@ -551,15 +560,15 @@ def params_ordered(self):
     def free_params(self):
         """List of all the free parameters in the timing model. Can be set to change which are free.
 
-        These are ordered as ``self.params_ordered`` does.
+        These are ordered as ``self.params`` does.
 
         Upon setting, order does not matter, and aliases are accepted.
         ValueError is raised if a parameter is not recognized.
 
         On setting, parameter aliases are converted with
         :func:`pint.models.timing_model.TimingModel.match_param_aliases`.
         """
-        return [p for p in self.params_ordered if not getattr(self, p).frozen]
+        return [p for p in self.params if not getattr(self, p).frozen]
 
     @free_params.setter
     def free_params(self, params):
@@ -620,7 +629,7 @@ def get_params_dict(self, which="free", kind="quantity"):
         if which == "free":
             ps = self.free_params
         elif which == "all":
-            ps = self.params_ordered
+            ps = self.params
         else:
             raise ValueError("get_params_dict expects which to be 'all' or 'free'")
         c = OrderedDict()
@@ -2014,10 +2023,7 @@ def compare(
             log.debug("Check verbosity - only warnings/info will be displayed")
         othermodel = copy.deepcopy(othermodel)
 
-        if (
-            "POSEPOCH" in self.params_ordered
-            and "POSEPOCH" in othermodel.params_ordered
-        ):
+        if "POSEPOCH" in self.params and "POSEPOCH" in othermodel.params:
             if (
                 self.POSEPOCH.value is not None
                 and othermodel.POSEPOCH.value is not None
@@ -2028,7 +2034,7 @@ def compare(
                     % (other_model_name, model_name)
                 )
                 othermodel.change_posepoch(self.POSEPOCH.value)
-        if "PEPOCH" in self.params_ordered and "PEPOCH" in othermodel.params_ordered:
+        if "PEPOCH" in self.params and "PEPOCH" in othermodel.params:
             if (
                 self.PEPOCH.value is not None
                 and self.PEPOCH.value != othermodel.PEPOCH.value
@@ -2037,7 +2043,7 @@ def compare(
                     "Updating PEPOCH in %s to match %s" % (other_model_name, model_name)
                 )
                 othermodel.change_pepoch(self.PEPOCH.value)
-        if "DMEPOCH" in self.params_ordered and "DMEPOCH" in othermodel.params_ordered:
+        if "DMEPOCH" in self.params and "DMEPOCH" in othermodel.params:
             if (
                 self.DMEPOCH.value is not None
                 and self.DMEPOCH.value != othermodel.DMEPOCH.value
@@ -2072,7 +2078,7 @@ def compare(
                     f"{model_name} is in ECL({self.ECL.value}) coordinates but {other_model_name} is in ICRS coordinates and convertcoordinates=False"
                 )
 
-        for pn in self.params_ordered:
+        for pn in self.params:
             par = getattr(self, pn)
             if par.value is None:
                 continue
@@ -2299,8 +2305,8 @@ def compare(
                 )
 
         # Now print any parameters in othermodel that were missing in self.
-        mypn = self.params_ordered
-        for opn in othermodel.params_ordered:
+        mypn = self.params
+        for opn in othermodel.params:
             if opn in mypn and getattr(self, opn).value is not None:
                 continue
             if nodmx and opn.startswith("DMX"):
diff --git a/src/pint/scripts/event_optimize.py b/src/pint/scripts/event_optimize.py
@@ -247,6 +247,82 @@ def get_fit_keyvals(model, phs=0.0, phserr=0.1):
     return fitkeys, np.asarray(fitvals), np.asarray(fiterrs)
 
 
+def run_sampler_autocorr(sampler, pos, nsteps, burnin, csteps=100, crit1=10):
+    """Runs the sampler and checks for chain convergence. Return the converged sampler and the mean autocorrelation time per 100 steps
+    Parameters
+    ----------
+    Sampler
+        The Emcee Ensemble Sampler
+    pos
+        The Initial positions of the walkers
+    nsteps : int
+        The number of integration steps
+    csteps : int
+        The interval at which the autocorrelation time is computed.
+    crit1 : int
+        The ratio of chain length to autocorrelation time to satisfy convergence
+    Returns
+    -------
+    The sampler and the mean autocorrelation times
+    Note
+    ----
+    The function checks for convergence of the chains every specified number of steps.
+    The criteria to check for convergence is:
+        1. the chain has to be longer than the specified ratio times the estimated autocorrelation time
+        2. the change in the estimated autocorrelation time is less than 1%
+    """
+    autocorr = []
+    old_tau = np.inf
+    converged1 = False
+    converged2 = False
+    for sample in sampler.sample(pos, iterations=nsteps, progress=True):
+        if not converged1:
+            # Checks if the iteration is past the burnin and checks for convergence at 10% tau change
+            if sampler.iteration >= burnin and sampler.iteration % csteps == 0:
+                tau = sampler.get_autocorr_time(tol=0, quiet=True)
+                if np.any(np.isnan(tau)):
+                    continue
+                else:
+                    x = np.mean(tau)
+                    autocorr.append(x)
+                    converged1 = np.all(tau * crit1 < sampler.iteration)
+                    converged1 &= np.all(np.abs(old_tau - tau) / tau < 0.1)
+                    # log.info("The mean estimated integrated autocorrelation step is: " + str(x))
+                    old_tau = tau
+                    if converged1:
+                        log.info(
+                            "10 % convergence reached with a mean estimated integrated step: "
+                            + str(x)
+                        )
+                    else:
+                        continue
+            else:
+                continue
+        else:
+            if not converged2:
+                # Checks for convergence at every 25 steps instead of 100 and tau change is 1%
+                if sampler.iteration % int(csteps / 4) == 0:
+                    tau = sampler.get_autocorr_time(tol=0, quiet=True)
+                    if np.any(np.isnan(tau)):
+                        continue
+                    else:
+                        x = np.mean(tau)
+                        autocorr.append(x)
+                        converged2 = np.all(tau * crit1 < sampler.iteration)
+                        converged2 &= np.all(np.abs(old_tau - tau) / tau < 0.01)
+                        # log.info("The mean estimated integrated autocorrelation step is: " + str(x))
+                        old_tau = tau
+                        converge_step = sampler.iteration
+                else:
+                    continue
+            if converged2 and (sampler.iteration - burnin) >= 1000:
+                log.info(f"Convergence reached at {converge_step}")
+                break
+            else:
+                continue
+    return autocorr
+
+
 class emcee_fitter(Fitter):
     def __init__(
         self, toas=None, model=None, template=None, weights=None, phs=0.5, phserr=0.03
@@ -545,6 +621,13 @@ def main(argv=None):
         default=False,
         action="store_true",
     )
+    parser.add_argument(
+        "--no-autocorr",
+        help="Turn the autocorrelation check function off",
+        default=False,
+        action="store_true",
+        dest="noautocorr",
+    )
 
     args = parser.parse_args(argv)
     pint.logging.setup(
@@ -820,21 +903,29 @@ def unwrapped_lnpost(theta):
                     pool=pool,
                     backend=backend,
                 )
-                sampler.run_mcmc(pos, nsteps)
+                if args.noautocorr:
+                    sampler.run_mcmc(pos, nsteps, progress=True)
+                else:
+                    autocorr = run_sampler_autocorr(sampler, pos, nsteps, burnin)
             pool.close()
             pool.join()
         except ImportError:
             log.info("Pathos module not available, using single core")
             sampler = emcee.EnsembleSampler(
                 nwalkers, ndim, ftr.lnposterior, blobs_dtype=dtype, backend=backend
             )
-            sampler.run_mcmc(pos, nsteps)
+            if args.noautocorr:
+                sampler.run_mcmc(pos, nsteps, progress=True)
+            else:
+                autocorr = run_sampler_autocorr(sampler, pos, nsteps, burnin)
     else:
         sampler = emcee.EnsembleSampler(
             nwalkers, ndim, ftr.lnposterior, blobs_dtype=dtype, backend=backend
         )
-        # The number is the number of points in the chain
-        sampler.run_mcmc(pos, nsteps)
+        if args.noautocorr:
+            sampler.run_mcmc(pos, nsteps, progress=True)
+        else:
+            autocorr = run_sampler_autocorr(sampler, pos, nsteps, burnin)
 
     def chains_to_dict(names, sampler):
         samples = np.transpose(sampler.get_chain(), (1, 0, 2))
diff --git a/tests/test_design_matrix.py b/tests/test_design_matrix.py
@@ -110,3 +110,8 @@ def test_combine_designmatrix_all(self):
             ]
             == 0.0
         )
+
+    def test_param_order(self):
+        params_dm = self.model.designmatrix(self.toas, incoffset=False)[1]
+        params_free = self.model.free_params
+        assert params_dm == params_free
diff --git a/tests/test_event_optimize.py b/tests/test_event_optimize.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+J0740+6620.cfr+19.tim`
	`2`	`+bench_*_summary`
Original file line number	Diff line number	Diff line change
`@@ -110,3 +110,8 @@ def test_combine_designmatrix_all(self):`
`110`	`110`	`]`
`111`	`111`	`== 0.0`
`112`	`112`	`)`
	`113`	`+`
	`114`	`+ def test_param_order(self):`
	`115`	`+ params_dm = self.model.designmatrix(self.toas, incoffset=False)[1]`
	`116`	`+ params_free = self.model.free_params`
	`117`	`+ assert params_dm == params_free`