@@ -382,6 +382,10 @@ def group_any_all(uint8_t[:] out,
382382 if values[i] == flag_val:
383383 out[lab] = flag_val
384384
385+ # ----------------------------------------------------------------------
386+ # group_add, group_prod, group_var, group_mean, group_ohlc
387+ # ----------------------------------------------------------------------
388+
385389
386390@ cython.wraparound (False )
387391@ cython.boundscheck (False )
@@ -433,5 +437,213 @@ def _group_add(floating[:, :] out,
433437group_add_float32 = _group_add[' float' ]
434438group_add_float64 = _group_add[' double' ]
435439
440+
441+ @ cython.wraparound (False )
442+ @ cython.boundscheck (False )
443+ def _group_prod (floating[:, :] out ,
444+ int64_t[:] counts ,
445+ floating[:, :] values ,
446+ const int64_t[:] labels ,
447+ Py_ssize_t min_count = 0 ):
448+ """
449+ Only aggregates on axis=0
450+ """
451+ cdef:
452+ Py_ssize_t i, j, N, K, lab, ncounts = len (counts)
453+ floating val, count
454+ ndarray[floating, ndim= 2 ] prodx, nobs
455+
456+ if not len (values) == len (labels):
457+ raise AssertionError (" len(index) != len(labels)" )
458+
459+ nobs = np.zeros_like(out)
460+ prodx = np.ones_like(out)
461+
462+ N, K = (< object > values).shape
463+
464+ with nogil:
465+ for i in range (N):
466+ lab = labels[i]
467+ if lab < 0 :
468+ continue
469+
470+ counts[lab] += 1
471+ for j in range (K):
472+ val = values[i, j]
473+
474+ # not nan
475+ if val == val:
476+ nobs[lab, j] += 1
477+ prodx[lab, j] *= val
478+
479+ for i in range (ncounts):
480+ for j in range (K):
481+ if nobs[i, j] < min_count:
482+ out[i, j] = NAN
483+ else :
484+ out[i, j] = prodx[i, j]
485+
486+
487+ group_prod_float32 = _group_prod[' float' ]
488+ group_prod_float64 = _group_prod[' double' ]
489+
490+
491+ @ cython.wraparound (False )
492+ @ cython.boundscheck (False )
493+ @ cython.cdivision (True )
494+ def _group_var (floating[:, :] out ,
495+ int64_t[:] counts ,
496+ floating[:, :] values ,
497+ const int64_t[:] labels ,
498+ Py_ssize_t min_count = - 1 ):
499+ cdef:
500+ Py_ssize_t i, j, N, K, lab, ncounts = len (counts)
501+ floating val, ct, oldmean
502+ ndarray[floating, ndim= 2 ] nobs, mean
503+
504+ assert min_count == - 1 , " 'min_count' only used in add and prod"
505+
506+ if not len (values) == len (labels):
507+ raise AssertionError (" len(index) != len(labels)" )
508+
509+ nobs = np.zeros_like(out)
510+ mean = np.zeros_like(out)
511+
512+ N, K = (< object > values).shape
513+
514+ out[:, :] = 0.0
515+
516+ with nogil:
517+ for i in range (N):
518+ lab = labels[i]
519+ if lab < 0 :
520+ continue
521+
522+ counts[lab] += 1
523+
524+ for j in range (K):
525+ val = values[i, j]
526+
527+ # not nan
528+ if val == val:
529+ nobs[lab, j] += 1
530+ oldmean = mean[lab, j]
531+ mean[lab, j] += (val - oldmean) / nobs[lab, j]
532+ out[lab, j] += (val - mean[lab, j]) * (val - oldmean)
533+
534+ for i in range (ncounts):
535+ for j in range (K):
536+ ct = nobs[i, j]
537+ if ct < 2 :
538+ out[i, j] = NAN
539+ else :
540+ out[i, j] /= (ct - 1 )
541+
542+
543+ group_var_float32 = _group_var[' float' ]
544+ group_var_float64 = _group_var[' double' ]
545+
546+
547+ @ cython.wraparound (False )
548+ @ cython.boundscheck (False )
549+ def _group_mean (floating[:, :] out ,
550+ int64_t[:] counts ,
551+ floating[:, :] values ,
552+ const int64_t[:] labels ,
553+ Py_ssize_t min_count = - 1 ):
554+ cdef:
555+ Py_ssize_t i, j, N, K, lab, ncounts = len (counts)
556+ floating val, count
557+ ndarray[floating, ndim= 2 ] sumx, nobs
558+
559+ assert min_count == - 1 , " 'min_count' only used in add and prod"
560+
561+ if not len (values) == len (labels):
562+ raise AssertionError (" len(index) != len(labels)" )
563+
564+ nobs = np.zeros_like(out)
565+ sumx = np.zeros_like(out)
566+
567+ N, K = (< object > values).shape
568+
569+ with nogil:
570+ for i in range (N):
571+ lab = labels[i]
572+ if lab < 0 :
573+ continue
574+
575+ counts[lab] += 1
576+ for j in range (K):
577+ val = values[i, j]
578+ # not nan
579+ if val == val:
580+ nobs[lab, j] += 1
581+ sumx[lab, j] += val
582+
583+ for i in range (ncounts):
584+ for j in range (K):
585+ count = nobs[i, j]
586+ if nobs[i, j] == 0 :
587+ out[i, j] = NAN
588+ else :
589+ out[i, j] = sumx[i, j] / count
590+
591+
592+ group_mean_float32 = _group_mean[' float' ]
593+ group_mean_float64 = _group_mean[' double' ]
594+
595+
596+ @ cython.wraparound (False )
597+ @ cython.boundscheck (False )
598+ def _group_ohlc (floating[:, :] out ,
599+ int64_t[:] counts ,
600+ floating[:, :] values ,
601+ const int64_t[:] labels ,
602+ Py_ssize_t min_count = - 1 ):
603+ """
604+ Only aggregates on axis=0
605+ """
606+ cdef:
607+ Py_ssize_t i, j, N, K, lab
608+ floating val, count
609+ Py_ssize_t ngroups = len (counts)
610+
611+ assert min_count == - 1 , " 'min_count' only used in add and prod"
612+
613+ if len (labels) == 0 :
614+ return
615+
616+ N, K = (< object > values).shape
617+
618+ if out.shape[1 ] != 4 :
619+ raise ValueError (' Output array must have 4 columns' )
620+
621+ if K > 1 :
622+ raise NotImplementedError (" Argument 'values' must have only "
623+ " one dimension" )
624+ out[:] = np.nan
625+
626+ with nogil:
627+ for i in range (N):
628+ lab = labels[i]
629+ if lab == - 1 :
630+ continue
631+
632+ counts[lab] += 1
633+ val = values[i, 0 ]
634+ if val != val:
635+ continue
636+
637+ if out[lab, 0 ] != out[lab, 0 ]:
638+ out[lab, 0 ] = out[lab, 1 ] = out[lab, 2 ] = out[lab, 3 ] = val
639+ else :
640+ out[lab, 1 ] = max (out[lab, 1 ], val)
641+ out[lab, 2 ] = min (out[lab, 2 ], val)
642+ out[lab, 3 ] = val
643+
644+
645+ group_ohlc_float32 = _group_ohlc[' float' ]
646+ group_ohlc_float64 = _group_ohlc[' double' ]
647+
436648# generated from template
437649include " groupby_helper.pxi"
0 commit comments