Skip to content

Commit

Permalink
refac: renamed variables from MLL_score. Added docstrings for MLL_mag…
Browse files Browse the repository at this point in the history
…nitude_test and MLL_score.

docs: updated URLs for intersphinx. Added new M-tests to the API reference.
  • Loading branch information
pabloitu committed Oct 21, 2024
1 parent d71efcf commit 43d57d2
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 46 deletions.
43 changes: 26 additions & 17 deletions csep/core/catalog_evaluations.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,16 +362,17 @@ def resampled_magnitude_test(forecast: "CatalogForecast",
"""
Performs the resampled magnitude test for catalog-based forecasts (Serafini et al., 2024),
which corrects the bias from the original M-test implementation to the total N of events.
Calculates the pseudo log-likelihood distribution from the synthetic catalog Λ_j as:
Calculates the (pseudo log-likelihood) test statistic distribution from the forecast's
synthetic catalogs Λ_j as:
D_j = Σ_k [log(Λ_U(k) / N_U + 1) - log(Λ̃_j(k) + 1)] ^ 2
D_j = Σ_k [log(Λ_u(k) * N / N_u + 1) - log(Λ̃_j(k) + 1)] ^ 2
where k are the magnitude bins, Λ_U the union of all synthetic catalogs, N_U the total
number of events in Λ_U, and Λ̃_j the resampled catalog containing exactly N events.
where k are the magnitude bins, Λ_u the union of all synthetic catalogs, N_u the total
number of events in Λ_u, and Λ̃_j the resampled catalog containing exactly N events.
The pseudo log-likelihood score from the observations is calculated as:
The pseudo log-likelihood statistic from the observations is calculated as:
D_o = Σ_k [log(Λ_U(k) / N_U + 1) - log(Ω(k) + 1)]^2
D_o = Σ_k [log(Λ_U(k) * N / N_u + 1) - log(Ω(k) + 1)]^2
where Ω is the observed catalog.
Expand Down Expand Up @@ -488,9 +489,22 @@ def MLL_magnitude_test(forecast: "CatalogForecast",
verbose: bool = False,
seed: Optional[int] = None) -> CatalogMagnitudeTestResult:
"""
Implements the modified Multinomial likelihood log-ratio (MLL) magnitude test (Serafini et
al., 2024).
Implements the modified Multinomial log-likelihood ratio (MLL) magnitude test (Serafini et
al., 2024). Calculates the test statistic distribution as:
D̃_j = -2 * log( L(Λ_u + N_u / N_j + Λ̃_j + 1) /
[L(Λ_u + N_u / N_j) * L(Λ̃_j + 1)]
)
where L is the multinomial likelihood function, Λ_u the union of all the forecasts'
synthetic catalogs, N_u the total number of events in Λ_u, Λ̃_j the resampled catalog
containing exactly N observed events. The observed statistic is defined as:
D_o = -2 * log( L(Λ_u + N_u / N + Ω + 1) /
[L(Λ_u + N_u / N) * L(Ω + 1)]
)
where Ω is the observed catalog.
Args:
forecast (CatalogForecast): The forecast to be evaluated
Expand Down Expand Up @@ -553,8 +567,8 @@ def MLL_magnitude_test(forecast: "CatalogForecast",
n_obs = numpy.sum(Omega_histogram)

# compute observed statistic
obs_d_statistic = MLL_score(Lambda_U_counts=Lambda_u_histogram,
Lambda_j_counts=Omega_histogram)
obs_d_statistic = MLL_score(union_catalog_counts=Lambda_u_histogram,
catalog_counts=Omega_histogram)

probs = Lambda_u_histogram / numpy.sum(Lambda_u_histogram)

Expand All @@ -571,16 +585,11 @@ def MLL_magnitude_test(forecast: "CatalogForecast",
Lambda_j_histogram, tmp = numpy.histogram(mag_values,
bins=numpy.append(forecast.magnitudes,
extended_mag_max))
# end new
n_events = numpy.sum(Lambda_j_histogram)
# if n_events == 0:
# print("Skipping to next because catalog contained zero events.")
# continue

# compute magnitude test statistic for the catalog
test_distribution.append(
MLL_score(Lambda_U_counts=Lambda_u_histogram,
Lambda_j_counts=Lambda_j_histogram)
MLL_score(union_catalog_counts=Lambda_u_histogram,
catalog_counts=Lambda_j_histogram)
)
# output status
if verbose:
Expand Down
72 changes: 46 additions & 26 deletions csep/utils/stats.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
from typing import Sequence

import numpy
import scipy.stats
import scipy.special
# PyCSEP imports
from csep.core import regions
import scipy.stats


def sup_dist(cdf1, cdf2):
"""
Expand Down Expand Up @@ -274,35 +271,58 @@ def get_Kagan_I1_score(forecasts, catalog):


def log_d_multinomial(x: numpy.ndarray, size: int, prob: numpy.ndarray):
"""
Args:
x:
size:
prob:
Returns:
"""
return scipy.special.loggamma(size + 1) + numpy.sum(
x * numpy.log(prob) - scipy.special.loggamma(x + 1))


def MLL_score(Lambda_U_counts: numpy.ndarray, Lambda_j_counts: numpy.ndarray):
def MLL_score(union_catalog_counts: numpy.ndarray, catalog_counts: numpy.ndarray):
"""
Calculates the modified Multinomial log-likelihood (MLL) score, defined by Serafini et al.,
(2024). It is built from a collection catalogs Λ_u and a single catalog Ω
MLL_score = 2 * log( L(Λ_u + N_u / N_o + Ω + 1) /
[L(Λ_u + N_u / N_o) * L(Ω + 1)]
)
where N_u and N_j are the total number of events in Λ_u and Ω, respectively.
Args:
Lambda_U_counts:
Lambda_j_counts:
union_catalog_counts (numpy.ndarray):
catalog_counts (numpy.ndarray):
Returns:
The MLL score for the collection of catalogs and
"""
N_u = numpy.sum(Lambda_U_counts)
N_j = numpy.sum(Lambda_j_counts)
coef_ = N_u / N_j
Lambda_U_mod = Lambda_U_counts + coef_
Lambda_j_mod = Lambda_j_counts + 1
Lambda_merged = Lambda_U_mod + Lambda_j_mod

pr_merged = Lambda_merged / numpy.sum(Lambda_merged)
pr_U = Lambda_U_mod / numpy.sum(Lambda_U_mod)
pr_j = Lambda_j_mod / numpy.sum(Lambda_j_mod)

loglik_merged = log_d_multinomial(x=Lambda_merged, size=numpy.sum(Lambda_merged),
prob=pr_merged)
loglik_U = log_d_multinomial(x=Lambda_U_mod, size=numpy.sum(Lambda_U_mod), prob=pr_U)
loglik_j = log_d_multinomial(x=Lambda_j_mod, size=numpy.sum(Lambda_j_mod), prob=pr_j)

return 2 * (loglik_merged - loglik_U - loglik_j)

N_u = numpy.sum(union_catalog_counts)
N_j = numpy.sum(catalog_counts)
events_ratio = N_u / N_j

union_catalog_counts_mod = union_catalog_counts + events_ratio
catalog_counts_mod = catalog_counts + 1
merged_catalog_j = union_catalog_counts_mod + catalog_counts_mod

pr_merged_cat = merged_catalog_j / numpy.sum(merged_catalog_j)
pr_union_cat = union_catalog_counts_mod / numpy.sum(union_catalog_counts_mod)
pr_cat_j = catalog_counts_mod / numpy.sum(catalog_counts_mod)

log_lik_merged = log_d_multinomial(x=merged_catalog_j,
size=numpy.sum(merged_catalog_j),
prob=pr_merged_cat)
log_lik_union = log_d_multinomial(x=union_catalog_counts_mod,
size=numpy.sum(union_catalog_counts_mod),
prob=pr_union_cat)
log_like_cat_j = log_d_multinomial(x=catalog_counts_mod,
size=numpy.sum(catalog_counts_mod),
prob=pr_cat_j)

return 2 * (log_lik_merged - log_lik_union - log_like_cat_j)
6 changes: 3 additions & 3 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,10 @@
# intersphinx configuration
intersphinx_mapping = {
"python": ("https://docs.python.org/3/", None),
"numpy": ("https://docs.scipy.org/doc/numpy/", None),
"numpy": (" https://numpy.org/doc/stable/", None),
"pandas": ("http://pandas.pydata.org/pandas-docs/stable/", None),
"scipy": ('http://docs.scipy.org/doc/scipy/reference', None),
"matplotlib": ('http://matplotlib.sourceforge.net/', None)
"scipy": ('https://docs.scipy.org/doc/scipy/', None),
"matplotlib": ('https://matplotlib.org/stable', None)
}

html_theme_options = {}
Expand Down
2 changes: 2 additions & 0 deletions docs/reference/api_reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,8 @@ Catalog-based forecast evaluations:
number_test
spatial_test
magnitude_test
resampled_magnitude_test
MLL_magnitude_test
pseudolikelihood_test
calibration_test

Expand Down

0 comments on commit 43d57d2

Please sign in to comment.