Skip to content

Commit

Permalink
Implemented Phipson-Smyth adjustment for permutation p-values in effs…
Browse files Browse the repository at this point in the history
…ize_objects (not yet implemented in delta_objects)
  • Loading branch information
mlotinga committed Aug 9, 2024
1 parent 1cd2bb5 commit 4067f67
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 7 deletions.
43 changes: 39 additions & 4 deletions dabest/_effsize_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
import pandas as pd
import lqrt
from scipy.stats import norm
from scipy.special import binom as binomcoeff # devMJBL
from scipy.stats import binom # devMJBL
from scipy.integrate import fixed_quad # devMJBL
from numpy import arange, mean # devMJBL
from numpy import array, isnan, isinf, repeat, random, isin, abs, var
from numpy import sort as npsort
from numpy import nan as npnan
Expand Down Expand Up @@ -1006,9 +1010,9 @@ def plot(
barplot_kwargs=None,
violinplot_kwargs=None,
slopegraph_kwargs=None,
slopegraph_xjitter=0,
slopegraph_yjitter=0,
jitter_seed=9876543210,
slopegraph_xjitter=0, # devMJBL
slopegraph_yjitter=0, # devMJBL
jitter_seed=9876543210, # devMJBL
sankey_kwargs=None,
reflines_kwargs=None,
group_summary_kwargs=None,
Expand Down Expand Up @@ -1430,6 +1434,7 @@ def __init__(self, control: array,

BAG = array([*control, *test])
CONTROL_LEN = int(len(control))
TEST_LEN = int(len(test)) # devMJBL
EXTREME_COUNT = 0.
THRESHOLD = abs(two_group_difference(control, test,
is_paired, effect_size))
Expand Down Expand Up @@ -1470,10 +1475,40 @@ def __init__(self, control: array,
if abs(es) > THRESHOLD:
EXTREME_COUNT += 1.

# devMJBL
# adjust calculated p-value according to Phipson & Smyth (2010)
# https://doi.org/10.2202/1544-6115.1585
# as per R code in statmod::permp
# https://rdrr.io/cran/statmod/src/R/permp.R
# (assumes two-sided test)

if CONTROL_LEN == TEST_LEN:
totalPermutations = binomcoeff(CONTROL_LEN + TEST_LEN, TEST_LEN)/2
else:
totalPermutations = binomcoeff(CONTROL_LEN + TEST_LEN, TEST_LEN)

if totalPermutations <= 10e3:
# use exact calculation
p = arange(1, totalPermutations + 1)/totalPermutations
x2 = repeat(EXTREME_COUNT, repeats=totalPermutations)
Y = binom.cdf(k=x2, n=permutation_count, p=p)
self.pvalue = mean(Y)
else:
# use integral approximation
def binomcdf(p, k, n):
return binom.cdf(k, n, p)

integrationVal, _ = fixed_quad(binomcdf,
a=0, b=0.5/totalPermutations,
args=(EXTREME_COUNT, permutation_count),
n=128)

self.pvalue = (EXTREME_COUNT + 1)/(permutation_count + 1) - integrationVal

self.__permutations = array(self.__permutations)
self.__permutations_var = array(self.__permutations_var)

self.pvalue = EXTREME_COUNT / self.__permutation_count
# self.pvalue = EXTREME_COUNT / self.__permutation_count


def __repr__(self):
Expand Down
6 changes: 3 additions & 3 deletions dabest/plotter.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,7 @@ def effectsize_df_plotter(effectsize_df, **plot_kwargs):
columns=xvar,
values=pivot_values,
)
rng = np.random.default_rng(plot_kwargs["jitter_seed"])
rng = np.random.default_rng(plot_kwargs["jitter_seed"]) # devMJBL
x_start = 0
for ii, current_tuple in enumerate(temp_idx):
current_pair = pivoted_plot_data.loc[
Expand All @@ -490,8 +490,8 @@ def effectsize_df_plotter(effectsize_df, **plot_kwargs):
grp_count = len(current_tuple)
# Iterate through the data for the current tuple.
for ID, observation in current_pair.iterrows():
x_points = [t + plot_kwargs["slopegraph_xjitter"]*rng.standard_t(df=6, size=None) for t in range(x_start, x_start + grp_count)]
y_points = np.array(observation[yvar].tolist()) + plot_kwargs["slopegraph_yjitter"]*rng.standard_t(df=6, size=len(observation[yvar].tolist()))
x_points = [t + plot_kwargs["slopegraph_xjitter"]*rng.standard_t(df=6, size=None) for t in range(x_start, x_start + grp_count)] # devMJBL
y_points = np.array(observation[yvar].tolist()) + plot_kwargs["slopegraph_yjitter"]*rng.standard_t(df=6, size=len(observation[yvar].tolist())) # devMJBL

if color_col is None:
slopegraph_kwargs["color"] = ytick_color
Expand Down

0 comments on commit 4067f67

Please sign in to comment.