@@ -382,6 +382,10 @@ def group_any_all(uint8_t[:] out,
382382 if values[i] == flag_val:
383383 out[lab] = flag_val
384384
385+ # ----------------------------------------------------------------------
386+ # group_add, group_prod, group_var, group_mean, group_ohlc
387+ # ----------------------------------------------------------------------
388+
385389
386390@ cython.wraparound (False )
387391@ cython.boundscheck (False )
@@ -396,9 +400,9 @@ def _group_add(floating[:, :] out,
396400 cdef:
397401 Py_ssize_t i, j, N, K, lab, ncounts = len (counts)
398402 floating val, count
399- ndarray[ floating, ndim = 2 ] sumx, nobs
403+ floating[:, : ] sumx, nobs
400404
401- if not len (values) = = len (labels):
405+ if len (values) ! = len (labels):
402406 raise AssertionError (" len(index) != len(labels)" )
403407
404408 nobs = np.zeros_like(out)
@@ -407,7 +411,6 @@ def _group_add(floating[:, :] out,
407411 N, K = (< object > values).shape
408412
409413 with nogil:
410-
411414 for i in range (N):
412415 lab = labels[i]
413416 if lab < 0 :
@@ -433,5 +436,213 @@ def _group_add(floating[:, :] out,
433436group_add_float32 = _group_add[' float' ]
434437group_add_float64 = _group_add[' double' ]
435438
439+
440+ @ cython.wraparound (False )
441+ @ cython.boundscheck (False )
442+ def _group_prod (floating[:, :] out ,
443+ int64_t[:] counts ,
444+ floating[:, :] values ,
445+ const int64_t[:] labels ,
446+ Py_ssize_t min_count = 0 ):
447+ """
448+ Only aggregates on axis=0
449+ """
450+ cdef:
451+ Py_ssize_t i, j, N, K, lab, ncounts = len (counts)
452+ floating val, count
453+ floating[:, :] prodx, nobs
454+
455+ if not len (values) == len (labels):
456+ raise AssertionError (" len(index) != len(labels)" )
457+
458+ nobs = np.zeros_like(out)
459+ prodx = np.ones_like(out)
460+
461+ N, K = (< object > values).shape
462+
463+ with nogil:
464+ for i in range (N):
465+ lab = labels[i]
466+ if lab < 0 :
467+ continue
468+
469+ counts[lab] += 1
470+ for j in range (K):
471+ val = values[i, j]
472+
473+ # not nan
474+ if val == val:
475+ nobs[lab, j] += 1
476+ prodx[lab, j] *= val
477+
478+ for i in range (ncounts):
479+ for j in range (K):
480+ if nobs[i, j] < min_count:
481+ out[i, j] = NAN
482+ else :
483+ out[i, j] = prodx[i, j]
484+
485+
486+ group_prod_float32 = _group_prod[' float' ]
487+ group_prod_float64 = _group_prod[' double' ]
488+
489+
490+ @ cython.wraparound (False )
491+ @ cython.boundscheck (False )
492+ @ cython.cdivision (True )
493+ def _group_var (floating[:, :] out ,
494+ int64_t[:] counts ,
495+ floating[:, :] values ,
496+ const int64_t[:] labels ,
497+ Py_ssize_t min_count = - 1 ):
498+ cdef:
499+ Py_ssize_t i, j, N, K, lab, ncounts = len (counts)
500+ floating val, ct, oldmean
501+ floating[:, :] nobs, mean
502+
503+ assert min_count == - 1 , " 'min_count' only used in add and prod"
504+
505+ if not len (values) == len (labels):
506+ raise AssertionError (" len(index) != len(labels)" )
507+
508+ nobs = np.zeros_like(out)
509+ mean = np.zeros_like(out)
510+
511+ N, K = (< object > values).shape
512+
513+ out[:, :] = 0.0
514+
515+ with nogil:
516+ for i in range (N):
517+ lab = labels[i]
518+ if lab < 0 :
519+ continue
520+
521+ counts[lab] += 1
522+
523+ for j in range (K):
524+ val = values[i, j]
525+
526+ # not nan
527+ if val == val:
528+ nobs[lab, j] += 1
529+ oldmean = mean[lab, j]
530+ mean[lab, j] += (val - oldmean) / nobs[lab, j]
531+ out[lab, j] += (val - mean[lab, j]) * (val - oldmean)
532+
533+ for i in range (ncounts):
534+ for j in range (K):
535+ ct = nobs[i, j]
536+ if ct < 2 :
537+ out[i, j] = NAN
538+ else :
539+ out[i, j] /= (ct - 1 )
540+
541+
542+ group_var_float32 = _group_var[' float' ]
543+ group_var_float64 = _group_var[' double' ]
544+
545+
546+ @ cython.wraparound (False )
547+ @ cython.boundscheck (False )
548+ def _group_mean (floating[:, :] out ,
549+ int64_t[:] counts ,
550+ floating[:, :] values ,
551+ const int64_t[:] labels ,
552+ Py_ssize_t min_count = - 1 ):
553+ cdef:
554+ Py_ssize_t i, j, N, K, lab, ncounts = len (counts)
555+ floating val, count
556+ floating[:, :] sumx, nobs
557+
558+ assert min_count == - 1 , " 'min_count' only used in add and prod"
559+
560+ if not len (values) == len (labels):
561+ raise AssertionError (" len(index) != len(labels)" )
562+
563+ nobs = np.zeros_like(out)
564+ sumx = np.zeros_like(out)
565+
566+ N, K = (< object > values).shape
567+
568+ with nogil:
569+ for i in range (N):
570+ lab = labels[i]
571+ if lab < 0 :
572+ continue
573+
574+ counts[lab] += 1
575+ for j in range (K):
576+ val = values[i, j]
577+ # not nan
578+ if val == val:
579+ nobs[lab, j] += 1
580+ sumx[lab, j] += val
581+
582+ for i in range (ncounts):
583+ for j in range (K):
584+ count = nobs[i, j]
585+ if nobs[i, j] == 0 :
586+ out[i, j] = NAN
587+ else :
588+ out[i, j] = sumx[i, j] / count
589+
590+
591+ group_mean_float32 = _group_mean[' float' ]
592+ group_mean_float64 = _group_mean[' double' ]
593+
594+
595+ @ cython.wraparound (False )
596+ @ cython.boundscheck (False )
597+ def _group_ohlc (floating[:, :] out ,
598+ int64_t[:] counts ,
599+ floating[:, :] values ,
600+ const int64_t[:] labels ,
601+ Py_ssize_t min_count = - 1 ):
602+ """
603+ Only aggregates on axis=0
604+ """
605+ cdef:
606+ Py_ssize_t i, j, N, K, lab
607+ floating val, count
608+ Py_ssize_t ngroups = len (counts)
609+
610+ assert min_count == - 1 , " 'min_count' only used in add and prod"
611+
612+ if len (labels) == 0 :
613+ return
614+
615+ N, K = (< object > values).shape
616+
617+ if out.shape[1 ] != 4 :
618+ raise ValueError (' Output array must have 4 columns' )
619+
620+ if K > 1 :
621+ raise NotImplementedError (" Argument 'values' must have only "
622+ " one dimension" )
623+ out[:] = np.nan
624+
625+ with nogil:
626+ for i in range (N):
627+ lab = labels[i]
628+ if lab == - 1 :
629+ continue
630+
631+ counts[lab] += 1
632+ val = values[i, 0 ]
633+ if val != val:
634+ continue
635+
636+ if out[lab, 0 ] != out[lab, 0 ]:
637+ out[lab, 0 ] = out[lab, 1 ] = out[lab, 2 ] = out[lab, 3 ] = val
638+ else :
639+ out[lab, 1 ] = max (out[lab, 1 ], val)
640+ out[lab, 2 ] = min (out[lab, 2 ], val)
641+ out[lab, 3 ] = val
642+
643+
644+ group_ohlc_float32 = _group_ohlc[' float' ]
645+ group_ohlc_float64 = _group_ohlc[' double' ]
646+
436647# generated from template
437648include " groupby_helper.pxi"
0 commit comments