@@ -6,6 +6,7 @@ from libc.math cimport (
66    sqrt,
77)
88from  libcpp.deque cimport deque
9+ from  libcpp.stack cimport stack
910from  libcpp.unordered_map cimport unordered_map
1011
1112from  pandas._libs.algos cimport TiebreakEnumType
@@ -988,39 +989,29 @@ def roll_median_c(const float64_t[:] values, ndarray[int64_t] start,
988989
989990#  ----------------------------------------------------------------------
990991
991- #  Moving maximum / minimum code taken from Bottleneck
992- #  Licence at LICENSES/BOTTLENECK_LICENCE
993- 
994- 
995- cdef float64_t init_mm(float64_t ai, Py_ssize_t * nobs, bint is_max) noexcept nogil:
996- 
997-     if  ai ==  ai:
998-         nobs[0 ] =  nobs[0 ] +  1 
999-     elif  is_max:
1000-         ai =  MINfloat64
1001-     else :
1002-         ai =  MAXfloat64
1003- 
1004-     return  ai
1005- 
1006- 
1007- cdef void  remove_mm(float64_t aold, Py_ssize_t * nobs) noexcept nogil:
1008-     """  remove a value from the mm calc """ 
1009-     if  aold ==  aold:
1010-         nobs[0 ] =  nobs[0 ] -  1 
1011- 
1012- 
1013- cdef float64_t calc_mm(int64_t minp, Py_ssize_t nobs,
1014-                        float64_t value) noexcept nogil:
1015-     cdef:
1016-         float64_t result
992+ cdef int64_t bisect_left(
993+     deque[int64_t]&  a,
994+     int64_t x,
995+     int64_t lo = 0 ,
996+     int64_t hi = - 1 
997+ ) nogil:
998+     """ Same as https://docs.python.org/3/library/bisect.html.""" 
999+ 
1000+     cdef int64_t mid
1001+     if  hi ==  - 1 :
1002+         hi =  a.size()
1003+     while  lo <  hi:
1004+         mid =  (lo +  hi) //  2 
1005+         if  a.at(mid) <  x:
1006+             lo =  mid +  1 
1007+         else :
1008+             hi =  mid
1009+     return  lo
10171010
1018-     if  nobs >=  minp:
1019-         result =  value
1020-     else :
1021-         result =  NaN
1011+ from  libc.math cimport isnan
10221012
1023-     return  result
1013+ #  Prior version of moving maximum / minimum code taken from Bottleneck
1014+ #  Licence at LICENSES/BOTTLENECK_LICENCE
10241015
10251016
10261017def  roll_max (ndarray[float64_t] values , ndarray[int64_t] start ,
@@ -1068,69 +1059,110 @@ def roll_min(ndarray[float64_t] values, ndarray[int64_t] start,
10681059    return _roll_min_max(values , start , end , minp , is_max = 0 )
10691060
10701061
1071- cdef _roll_min_max(ndarray[float64_t] values ,
1072-                    ndarray[int64_t] starti ,
1073-                    ndarray[int64_t] endi ,
1074-                    int64_t minp ,
1075-                    bint is_max ):
1062+ def _roll_min_max(
1063+     ndarray[float64_t] values ,
1064+     ndarray[int64_t] start ,
1065+     ndarray[int64_t] end ,
1066+     int64_t minp ,
1067+     bint is_max 
1068+ ):
10761069    cdef:
1077-         float64_t ai
1078-         int64_t curr_win_size, start
1079-         Py_ssize_t i, k, nobs =  0 , N =  len (starti)
1080-         deque Q[int64_t]  #  min/max always the front
1081-         deque W[int64_t]  #  track the whole window for nobs compute
1070+         Py_ssize_t i, i_next, k, valid_start, last_end, last_start, N =  len (start)
1071+         #  Indices of bounded extrema in `values`. `candidates[i]` is always increasing.
1072+         #  `values[candidates[i]]` is decreasing for max and increasing for min.
1073+         deque candidates[int64_t]
1074+         #  Indices of largest windows that "cover" preceding windows.
1075+         stack dominators[int64_t]
10821076        ndarray[float64_t, ndim= 1 ] output
10831077
1078+         Py_ssize_t this_start, this_end, stash_start
1079+         int64_t q_idx
1080+ 
10841081    output =  np.empty(N, dtype = np.float64)
1085-     Q =  deque[int64_t]()
1086-     W =  deque[int64_t]()
1082+     candidates =  deque[int64_t]()
1083+     dominators =  stack[int64_t]()
1084+ 
1085+     #  This function was "ported" / translated from sliding_min_max()
1086+     #  in /pandas/core/_numba/kernels/min_max_.py.
1087+     #  (See there for credits and some comments.)
1088+     #  Code translation assumptions/rules:
1089+     #  - min_periods --> minp
1090+     #  - deque[0] --> front()
1091+     #  - deque[-1] --> back()
1092+     #  - stack[-1] --> top()
1093+     #  - bool(stack/deque) --> !empty()
1094+     #  - deque.append()    --> push_back()
1095+     #  - stack.append()    --> push()
1096+     #  - deque.popleft     --> pop_front()
1097+     #  - deque.pop()       --> pop_back()
10871098
10881099    with  nogil:
1100+         if  minp <  1 :
1101+             minp =  1 
1102+ 
1103+         if  N> 2 :
1104+             i_next =  N -  1 
1105+             for  i in  range (N -  2 , - 1 , - 1 ):
1106+                 if  start[i_next] <  start[i] \
1107+                     and  (
1108+                            dominators.empty()
1109+                         or  start[dominators.top()] >  start[i_next]
1110+                 ):
1111+                     dominators.push(i_next)
1112+                 i_next =  i
1113+ 
1114+         #  NaN tracking to guarantee minp
1115+         valid_start =  - minp
1116+ 
1117+         last_end =  0 
1118+         last_start =  - 1 
10891119
1090-         #  This is using a modified version of the C++ code in this
1091-         #  SO post: https://stackoverflow.com/a/12239580
1092-         #  The original impl didn't deal with variable window sizes
1093-         #  So the code was optimized for that
1094- 
1095-         #  first window's size
1096-         curr_win_size =  endi[0 ] -  starti[0 ]
1097-         #  GH 32865
1098-         #  Anchor output index to values index to provide custom
1099-         #  BaseIndexer support
11001120        for  i in  range (N):
1121+             this_start =  start[i]
1122+             this_end =  end[i]
11011123
1102-             curr_win_size =  endi[i] -  starti[i]
1103-             if  i ==  0 :
1104-                 start =  starti[i]
1105-             else :
1106-                 start =  endi[i -  1 ]
1107- 
1108-             for  k in  range (start, endi[i]):
1109-                 ai =  init_mm(values[k], & nobs, is_max)
1110-                 #  Discard previous entries if we find new min or max
1111-                 if  is_max:
1112-                     while  not  Q.empty() and  ((ai >=  values[Q.back()]) or 
1113-                                              values[Q.back()] !=  values[Q.back()]):
1114-                         Q.pop_back()
1115-                 else :
1116-                     while  not  Q.empty() and  ((ai <=  values[Q.back()]) or 
1117-                                              values[Q.back()] !=  values[Q.back()]):
1118-                         Q.pop_back()
1119-                 Q.push_back(k)
1120-                 W.push_back(k)
1121- 
1122-             #  Discard entries outside and left of current window
1123-             while  not  Q.empty() and  Q.front() <=  starti[i] -  1 :
1124-                 Q.pop_front()
1125-             while  not  W.empty() and  W.front() <=  starti[i] -  1 :
1126-                 remove_mm(values[W.front()], & nobs)
1127-                 W.pop_front()
1128- 
1129-             #  Save output based on index in input value array
1130-             if  not  Q.empty() and  curr_win_size >  0 :
1131-                 output[i] =  calc_mm(minp, nobs, values[Q.front()])
1124+             if  (not  dominators.empty() and  dominators.top() ==  i):
1125+                 dominators.pop()
1126+ 
1127+             if  not  (this_end >  last_end
1128+                     or  (this_end ==  last_end and  this_start >=  last_start)):
1129+                 raise  ValueError (
1130+                     " Start/End ordering requirement is violated at index {}" 
1131+ 
1132+             if  dominators.empty():
1133+                 stash_start =  this_start
11321134            else :
1135+                 stash_start =  min (this_start, start[dominators.top()])
1136+ 
1137+             while  not  candidates.empty() and  candidates.front() <  stash_start:
1138+                 candidates.pop_front()
1139+ 
1140+             for  k in  range (last_end, this_end):
1141+                 if  not  isnan(values[k]):
1142+                     valid_start +=  1 
1143+                     while  valid_start >=  0  and  isnan(values[valid_start]):
1144+                         valid_start +=  1 
1145+ 
1146+                     if  is_max:
1147+                         while  (not  candidates.empty()
1148+                                 and  values[k] >=  values[candidates.back()]):
1149+                             candidates.pop_back()
1150+                     else :
1151+                         while  (not  candidates.empty()
1152+                                 and  values[k] <=  values[candidates.back()]):
1153+                             candidates.pop_back()
1154+                     candidates.push_back(k)
1155+ 
1156+             if  candidates.empty() or  this_start >  valid_start:
11331157                output[i] =  NaN
1158+             elif  candidates.front() >=  this_start:
1159+                 #  ^^ This is here to avoid costly bisection for fixed window sizes.
1160+                 output[i] =  values[candidates.front()]
1161+             else :
1162+                 q_idx =  bisect_left(candidates, this_start, lo = 1 )
1163+                 output[i] =  values[candidates[q_idx]]
1164+             last_end =  this_end
1165+             last_start =  this_start
11341166
11351167    return  output
11361168
0 commit comments