@@ -382,6 +382,9 @@ def group_any_all(uint8_t[:] out,
382382 if values[i] == flag_val:
383383 out[lab] = flag_val
384384
385+ # ----------------------------------------------------------------------
386+ # group_add, group_prod, group_var, group_mean, group_ohlc
387+ # ----------------------------------------------------------------------
385388
386389@ cython.wraparound (False )
387390@ cython.boundscheck (False )
@@ -433,5 +436,212 @@ def _group_add(floating[:, :] out,
433436group_add_float32 = _group_add[' float' ]
434437group_add_float64 = _group_add[' double' ]
435438
439+
440+ @ cython.wraparound (False )
441+ @ cython.boundscheck (False )
442+ def _group_prod (floating[:, :] out ,
443+ int64_t[:] counts ,
444+ floating[:, :] values ,
445+ const int64_t[:] labels ,
446+ Py_ssize_t min_count = 0 ):
447+ """
448+ Only aggregates on axis=0
449+ """
450+ cdef:
451+ Py_ssize_t i, j, N, K, lab, ncounts = len (counts)
452+ floating val, count
453+ ndarray[floating, ndim= 2 ] prodx, nobs
454+
455+ if not len (values) == len (labels):
456+ raise AssertionError (" len(index) != len(labels)" )
457+
458+ nobs = np.zeros_like(out)
459+ prodx = np.ones_like(out)
460+
461+ N, K = (< object > values).shape
462+
463+ with nogil:
464+ for i in range (N):
465+ lab = labels[i]
466+ if lab < 0 :
467+ continue
468+
469+ counts[lab] += 1
470+ for j in range (K):
471+ val = values[i, j]
472+
473+ # not nan
474+ if val == val:
475+ nobs[lab, j] += 1
476+ prodx[lab, j] *= val
477+
478+ for i in range (ncounts):
479+ for j in range (K):
480+ if nobs[i, j] < min_count:
481+ out[i, j] = NAN
482+ else :
483+ out[i, j] = prodx[i, j]
484+
485+ group_prod_float32 = _group_prod[' float' ]
486+ group_prod_float64 = _group_prod[' double' ]
487+
488+
489+ @ cython.wraparound (False )
490+ @ cython.boundscheck (False )
491+ @ cython.cdivision (True )
492+ def _group_var (floating[:, :] out ,
493+ int64_t[:] counts ,
494+ floating[:, :] values ,
495+ const int64_t[:] labels ,
496+ Py_ssize_t min_count = - 1 ):
497+ cdef:
498+ Py_ssize_t i, j, N, K, lab, ncounts = len (counts)
499+ floating val, ct, oldmean
500+ ndarray[floating, ndim= 2 ] nobs, mean
501+
502+ assert min_count == - 1 , " 'min_count' only used in add and prod"
503+
504+ if not len (values) == len (labels):
505+ raise AssertionError (" len(index) != len(labels)" )
506+
507+ nobs = np.zeros_like(out)
508+ mean = np.zeros_like(out)
509+
510+ N, K = (< object > values).shape
511+
512+ out[:, :] = 0.0
513+
514+ with nogil:
515+ for i in range (N):
516+ lab = labels[i]
517+ if lab < 0 :
518+ continue
519+
520+ counts[lab] += 1
521+
522+ for j in range (K):
523+ val = values[i, j]
524+
525+ # not nan
526+ if val == val:
527+ nobs[lab, j] += 1
528+ oldmean = mean[lab, j]
529+ mean[lab, j] += (val - oldmean) / nobs[lab, j]
530+ out[lab, j] += (val - mean[lab, j]) * (val - oldmean)
531+
532+ for i in range (ncounts):
533+ for j in range (K):
534+ ct = nobs[i, j]
535+ if ct < 2 :
536+ out[i, j] = NAN
537+ else :
538+ out[i, j] /= (ct - 1 )
539+
540+
541+ group_var_float32 = _group_var[' float' ]
542+ group_var_float64 = _group_var[' double' ]
543+
544+
545+ @ cython.wraparound (False )
546+ @ cython.boundscheck (False )
547+ def _group_mean (floating[:, :] out ,
548+ int64_t[:] counts ,
549+ floating[:, :] values ,
550+ const int64_t[:] labels ,
551+ Py_ssize_t min_count = - 1 ):
552+ cdef:
553+ Py_ssize_t i, j, N, K, lab, ncounts = len (counts)
554+ floating val, count
555+ ndarray[floating, ndim= 2 ] sumx, nobs
556+
557+ assert min_count == - 1 , " 'min_count' only used in add and prod"
558+
559+ if not len (values) == len (labels):
560+ raise AssertionError (" len(index) != len(labels)" )
561+
562+ nobs = np.zeros_like(out)
563+ sumx = np.zeros_like(out)
564+
565+ N, K = (< object > values).shape
566+
567+ with nogil:
568+ for i in range (N):
569+ lab = labels[i]
570+ if lab < 0 :
571+ continue
572+
573+ counts[lab] += 1
574+ for j in range (K):
575+ val = values[i, j]
576+ # not nan
577+ if val == val:
578+ nobs[lab, j] += 1
579+ sumx[lab, j] += val
580+
581+ for i in range (ncounts):
582+ for j in range (K):
583+ count = nobs[i, j]
584+ if nobs[i, j] == 0 :
585+ out[i, j] = NAN
586+ else :
587+ out[i, j] = sumx[i, j] / count
588+
589+
590+ group_mean_float32 = _group_mean[' float' ]
591+ group_mean_float64 = _group_mean[' double' ]
592+
593+
594+ @ cython.wraparound (False )
595+ @ cython.boundscheck (False )
596+ def _group_ohlc (floating[:, :] out ,
597+ int64_t[:] counts ,
598+ floating[:, :] values ,
599+ const int64_t[:] labels ,
600+ Py_ssize_t min_count = - 1 ):
601+ """
602+ Only aggregates on axis=0
603+ """
604+ cdef:
605+ Py_ssize_t i, j, N, K, lab
606+ floating val, count
607+ Py_ssize_t ngroups = len (counts)
608+
609+ assert min_count == - 1 , " 'min_count' only used in add and prod"
610+
611+ if len (labels) == 0 :
612+ return
613+
614+ N, K = (< object > values).shape
615+
616+ if out.shape[1 ] != 4 :
617+ raise ValueError (' Output array must have 4 columns' )
618+
619+ if K > 1 :
620+ raise NotImplementedError (" Argument 'values' must have only "
621+ " one dimension" )
622+ out[:] = np.nan
623+
624+ with nogil:
625+ for i in range (N):
626+ lab = labels[i]
627+ if lab == - 1 :
628+ continue
629+
630+ counts[lab] += 1
631+ val = values[i, 0 ]
632+ if val != val:
633+ continue
634+
635+ if out[lab, 0 ] != out[lab, 0 ]:
636+ out[lab, 0 ] = out[lab, 1 ] = out[lab, 2 ] = out[lab, 3 ] = val
637+ else :
638+ out[lab, 1 ] = max (out[lab, 1 ], val)
639+ out[lab, 2 ] = min (out[lab, 2 ], val)
640+ out[lab, 3 ] = val
641+
642+
643+ group_ohlc_float32 = _group_ohlc[' float' ]
644+ group_ohlc_float64 = _group_ohlc[' double' ]
645+
436646# generated from template
437647include " groupby_helper.pxi"
0 commit comments