22
33import numpy as np
44
5+ from pandas ._libs import lib
56from pandas ._typing import (
67 ArrayLike ,
8+ Scalar ,
79 npt ,
810)
11+ from pandas .compat .numpy import np_percentile_argname
912
1013from pandas .core .dtypes .missing import (
1114 isna ,
1215 na_value_for_dtype ,
1316)
1417
15- from pandas .core .nanops import nanpercentile
16-
1718
1819def quantile_compat (
1920 values : ArrayLike , qs : npt .NDArray [np .float64 ], interpolation : str
@@ -41,7 +42,7 @@ def quantile_compat(
4142
4243def quantile_with_mask (
4344 values : np .ndarray ,
44- mask : np .ndarray ,
45+ mask : npt . NDArray [ np .bool_ ] ,
4546 fill_value ,
4647 qs : npt .NDArray [np .float64 ],
4748 interpolation : str ,
@@ -84,10 +85,9 @@ def quantile_with_mask(
8485 flat = np .array ([fill_value ] * len (qs ))
8586 result = np .repeat (flat , len (values )).reshape (len (values ), len (qs ))
8687 else :
87- # asarray needed for Sparse, see GH#24600
88- result = nanpercentile (
88+ result = _nanpercentile (
8989 values ,
90- np . array ( qs ) * 100 ,
90+ qs * 100.0 ,
9191 na_value = fill_value ,
9292 mask = mask ,
9393 interpolation = interpolation ,
@@ -97,3 +97,92 @@ def quantile_with_mask(
9797 result = result .T
9898
9999 return result
100+
101+
102+ def _nanpercentile_1d (
103+ values : np .ndarray ,
104+ mask : npt .NDArray [np .bool_ ],
105+ qs : npt .NDArray [np .float64 ],
106+ na_value : Scalar ,
107+ interpolation ,
108+ ) -> Scalar | np .ndarray :
109+ """
110+ Wrapper for np.percentile that skips missing values, specialized to
111+ 1-dimensional case.
112+
113+ Parameters
114+ ----------
115+ values : array over which to find quantiles
116+ mask : ndarray[bool]
117+ locations in values that should be considered missing
118+ qs : np.ndarray[float64] of quantile indices to find
119+ na_value : scalar
120+ value to return for empty or all-null values
121+ interpolation : str
122+
123+ Returns
124+ -------
125+ quantiles : scalar or array
126+ """
127+ # mask is Union[ExtensionArray, ndarray]
128+ values = values [~ mask ]
129+
130+ if len (values ) == 0 :
131+ return np .array ([na_value ] * len (qs ), dtype = values .dtype )
132+
133+ return np .percentile (values , qs , ** {np_percentile_argname : interpolation })
134+
135+
136+ def _nanpercentile (
137+ values : np .ndarray ,
138+ qs : npt .NDArray [np .float64 ],
139+ * ,
140+ na_value ,
141+ mask : npt .NDArray [np .bool_ ],
142+ interpolation ,
143+ ):
144+ """
145+ Wrapper for np.percentile that skips missing values.
146+
147+ Parameters
148+ ----------
149+ values : np.ndarray[ndim=2] over which to find quantiles
150+ qs : np.ndarray[float64] of quantile indices to find
151+ na_value : scalar
152+ value to return for empty or all-null values
153+ mask : np.ndarray[bool]
154+ locations in values that should be considered missing
155+ interpolation : str
156+
157+ Returns
158+ -------
159+ quantiles : scalar or array
160+ """
161+
162+ if values .dtype .kind in ["m" , "M" ]:
163+ # need to cast to integer to avoid rounding errors in numpy
164+ result = _nanpercentile (
165+ values .view ("i8" ),
166+ qs = qs ,
167+ na_value = na_value .view ("i8" ),
168+ mask = mask ,
169+ interpolation = interpolation ,
170+ )
171+
172+ # Note: we have to do `astype` and not view because in general we
173+ # have float result at this point, not i8
174+ return result .astype (values .dtype )
175+
176+ if not lib .is_scalar (mask ) and mask .any ():
177+ # Caller is responsible for ensuring mask shape match
178+ assert mask .shape == values .shape
179+ result = [
180+ _nanpercentile_1d (val , m , qs , na_value , interpolation = interpolation )
181+ for (val , m ) in zip (list (values ), list (mask ))
182+ ]
183+ result = np .array (result , dtype = values .dtype , copy = False ).T
184+ return result
185+ else :
186+ return np .percentile (
187+ values , qs , axis = 1 , ** {np_percentile_argname : interpolation }
188+ )
0 commit comments