refac: renamed variables from MLL_score. Added docstrings for MLL_mag…

…nitude_test and MLL_score. docs: updated URLs for intersphinx. Added new M-tests to the API reference.
SCECcode · Oct 21, 2024 · 43d57d2 · 43d57d2
1 parent d71efcf
commit 43d57d2
Show file tree

Hide file tree

Showing 4 changed files with 77 additions and 46 deletions.
diff --git a/csep/core/catalog_evaluations.py b/csep/core/catalog_evaluations.py
@@ -362,16 +362,17 @@ def resampled_magnitude_test(forecast: "CatalogForecast",
     """
     Performs the resampled magnitude test for catalog-based forecasts (Serafini et al., 2024),
     which corrects the bias from the original M-test implementation to the total N of events.
-    Calculates the pseudo log-likelihood distribution from the synthetic catalog Λ_j as:
+    Calculates the (pseudo log-likelihood) test statistic distribution from the forecast's
+    synthetic catalogs Λ_j as:
 
-        D_j = Σ_k [log(Λ_U(k) / N_U + 1) - log(Λ̃_j(k) + 1)] ^ 2
+        D_j = Σ_k [log(Λ_u(k) * N / N_u + 1) - log(Λ̃_j(k) + 1)] ^ 2
 
-    where k are the magnitude bins,  Λ_U the union of all synthetic catalogs, N_U the total
-    number of events in Λ_U,  and Λ̃_j the resampled catalog containing exactly N events.
+    where k are the magnitude bins, Λ_u the union of all synthetic catalogs, N_u the total
+    number of events in Λ_u, and Λ̃_j the resampled catalog containing exactly N events.
 
-    The pseudo log-likelihood score from the observations is calculated as:
+    The pseudo log-likelihood statistic from the observations is calculated as:
 
-        D_o = Σ_k [log(Λ_U(k) / N_U + 1) - log(Ω(k) + 1)]^2
+        D_o = Σ_k [log(Λ_U(k) * N / N_u + 1) - log(Ω(k) + 1)]^2
 
     where Ω is the observed catalog.
 
@@ -488,9 +489,22 @@ def MLL_magnitude_test(forecast: "CatalogForecast",
                        verbose: bool = False,
                        seed: Optional[int] = None) -> CatalogMagnitudeTestResult:
     """
-    Implements the modified Multinomial likelihood log-ratio (MLL) magnitude test (Serafini et
-    al., 2024).
+    Implements the modified Multinomial log-likelihood ratio (MLL) magnitude test (Serafini et
+    al., 2024). Calculates the test statistic distribution as:
 
+        D̃_j = -2 * log( L(Λ_u + N_u / N_j + Λ̃_j + 1) /
+                       [L(Λ_u + N_u / N_j) * L(Λ̃_j + 1)]
+                       )
+
+    where L is the multinomial likelihood function, Λ_u the union of all the forecasts'
+    synthetic catalogs, N_u the total number of events in Λ_u, Λ̃_j the resampled catalog
+    containing exactly N observed events. The observed statistic is defined as:
+
+        D_o = -2 * log( L(Λ_u + N_u / N + Ω + 1) /
+                       [L(Λ_u + N_u / N) * L(Ω + 1)]
+                       )
+
+    where Ω is the observed catalog.
 
     Args:
         forecast (CatalogForecast): The forecast to be evaluated
@@ -553,8 +567,8 @@ def MLL_magnitude_test(forecast: "CatalogForecast",
     n_obs = numpy.sum(Omega_histogram)
 
     # compute observed statistic
-    obs_d_statistic = MLL_score(Lambda_U_counts=Lambda_u_histogram,
-                                Lambda_j_counts=Omega_histogram)
+    obs_d_statistic = MLL_score(union_catalog_counts=Lambda_u_histogram,
+                                catalog_counts=Omega_histogram)
 
     probs = Lambda_u_histogram / numpy.sum(Lambda_u_histogram)
 
@@ -571,16 +585,11 @@ def MLL_magnitude_test(forecast: "CatalogForecast",
         Lambda_j_histogram, tmp = numpy.histogram(mag_values,
                                                   bins=numpy.append(forecast.magnitudes,
                                                                     extended_mag_max))
-        # end new
-        n_events = numpy.sum(Lambda_j_histogram)
-        # if n_events == 0:
-        # print("Skipping to next because catalog contained zero events.")
-        #   continue
 
         # compute magnitude test statistic for the catalog
         test_distribution.append(
-            MLL_score(Lambda_U_counts=Lambda_u_histogram,
-                      Lambda_j_counts=Lambda_j_histogram)
+            MLL_score(union_catalog_counts=Lambda_u_histogram,
+                      catalog_counts=Lambda_j_histogram)
         )
         # output status
         if verbose:

diff --git a/csep/utils/stats.py b/csep/utils/stats.py
@@ -1,10 +1,7 @@
-from typing import Sequence
-
 import numpy
-import scipy.stats
 import scipy.special
-# PyCSEP imports
-from csep.core import regions
+import scipy.stats
+
 
 def sup_dist(cdf1, cdf2):
     """
@@ -274,35 +271,58 @@ def get_Kagan_I1_score(forecasts, catalog):
 
 
 def log_d_multinomial(x: numpy.ndarray, size: int, prob: numpy.ndarray):
+    """
+
+    Args:
+        x:
+        size:
+        prob:
 
+    Returns:
+
+    """
     return scipy.special.loggamma(size + 1) + numpy.sum(
         x * numpy.log(prob) - scipy.special.loggamma(x + 1))
 
 
-def MLL_score(Lambda_U_counts: numpy.ndarray, Lambda_j_counts: numpy.ndarray):
+def MLL_score(union_catalog_counts: numpy.ndarray, catalog_counts: numpy.ndarray):
     """
+    Calculates the modified Multinomial log-likelihood (MLL) score, defined by Serafini et al.,
+    (2024). It is built from a collection catalogs Λ_u and a single catalog Ω
+
+        MLL_score = 2 * log( L(Λ_u + N_u / N_o + Ω + 1) /
+                           [L(Λ_u + N_u / N_o) * L(Ω + 1)]
+                           )
+    where N_u and N_j are the total number of events in Λ_u and Ω, respectively.
 
     Args:
-        Lambda_U_counts:
-        Lambda_j_counts:
+        union_catalog_counts (numpy.ndarray):
+        catalog_counts (numpy.ndarray):
 
     Returns:
-
+        The MLL score for the collection of catalogs and
     """
-    N_u = numpy.sum(Lambda_U_counts)
-    N_j = numpy.sum(Lambda_j_counts)
-    coef_ = N_u / N_j
-    Lambda_U_mod = Lambda_U_counts + coef_
-    Lambda_j_mod = Lambda_j_counts + 1
-    Lambda_merged = Lambda_U_mod + Lambda_j_mod
-
-    pr_merged = Lambda_merged / numpy.sum(Lambda_merged)
-    pr_U = Lambda_U_mod / numpy.sum(Lambda_U_mod)
-    pr_j = Lambda_j_mod / numpy.sum(Lambda_j_mod)
-
-    loglik_merged = log_d_multinomial(x=Lambda_merged, size=numpy.sum(Lambda_merged),
-                                      prob=pr_merged)
-    loglik_U = log_d_multinomial(x=Lambda_U_mod, size=numpy.sum(Lambda_U_mod), prob=pr_U)
-    loglik_j = log_d_multinomial(x=Lambda_j_mod, size=numpy.sum(Lambda_j_mod), prob=pr_j)
-
-    return 2 * (loglik_merged - loglik_U - loglik_j)
+
+    N_u = numpy.sum(union_catalog_counts)
+    N_j = numpy.sum(catalog_counts)
+    events_ratio = N_u / N_j
+
+    union_catalog_counts_mod = union_catalog_counts + events_ratio
+    catalog_counts_mod = catalog_counts + 1
+    merged_catalog_j = union_catalog_counts_mod + catalog_counts_mod
+
+    pr_merged_cat = merged_catalog_j / numpy.sum(merged_catalog_j)
+    pr_union_cat = union_catalog_counts_mod / numpy.sum(union_catalog_counts_mod)
+    pr_cat_j = catalog_counts_mod / numpy.sum(catalog_counts_mod)
+
+    log_lik_merged = log_d_multinomial(x=merged_catalog_j,
+                                       size=numpy.sum(merged_catalog_j),
+                                       prob=pr_merged_cat)
+    log_lik_union = log_d_multinomial(x=union_catalog_counts_mod,
+                                      size=numpy.sum(union_catalog_counts_mod),
+                                      prob=pr_union_cat)
+    log_like_cat_j = log_d_multinomial(x=catalog_counts_mod,
+                                       size=numpy.sum(catalog_counts_mod),
+                                       prob=pr_cat_j)
+
+    return 2 * (log_lik_merged - log_lik_union - log_like_cat_j)
diff --git a/docs/conf.py b/docs/conf.py
@@ -99,10 +99,10 @@
 # intersphinx configuration
 intersphinx_mapping = {
     "python": ("https://docs.python.org/3/", None),
-    "numpy": ("https://docs.scipy.org/doc/numpy/", None),
+    "numpy": (" https://numpy.org/doc/stable/", None),
     "pandas": ("http://pandas.pydata.org/pandas-docs/stable/", None),
-    "scipy": ('http://docs.scipy.org/doc/scipy/reference', None),
-    "matplotlib": ('http://matplotlib.sourceforge.net/', None)
+    "scipy": ('https://docs.scipy.org/doc/scipy/', None),
+    "matplotlib": ('https://matplotlib.org/stable', None)
 }
 
 html_theme_options = {}

diff --git a/docs/reference/api_reference.rst b/docs/reference/api_reference.rst
@@ -159,6 +159,8 @@ Catalog-based forecast evaluations:
    number_test
    spatial_test
    magnitude_test
+   resampled_magnitude_test
+   MLL_magnitude_test
    pseudolikelihood_test
    calibration_test