|
15 | 15 | #
|
16 | 16 | from functools import partial
|
17 | 17 | from typing import Any
|
18 |
| -from functools import reduce |
19 | 18 |
|
20 | 19 | from databricks.koalas.internal import _InternalFrame, SPARK_INDEX_NAME_FORMAT
|
21 | 20 | from databricks.koalas.utils import name_like_string
|
@@ -108,8 +107,9 @@ def count(self):
|
108 | 107 |
|
109 | 108 | See Also
|
110 | 109 | --------
|
111 |
| - Series.rolling : Calling object with Series data. |
112 |
| - DataFrame.rolling : Calling object with DataFrames. |
| 110 | + Series.expanding : Calling object with Series data. |
| 111 | + DataFrame.expanding : Calling object with DataFrames. |
| 112 | + Series.count : Count of the full Series. |
113 | 113 | DataFrame.count : Count of the full DataFrame.
|
114 | 114 |
|
115 | 115 | Examples
|
@@ -237,6 +237,7 @@ def count(self):
|
237 | 237 | --------
|
238 | 238 | Series.expanding : Calling object with Series data.
|
239 | 239 | DataFrame.expanding : Calling object with DataFrames.
|
| 240 | + Series.count : Count of the full Series. |
240 | 241 | DataFrame.count : Count of the full DataFrame.
|
241 | 242 |
|
242 | 243 | Examples
|
@@ -283,6 +284,8 @@ def sum(self):
|
283 | 284 |
|
284 | 285 | See Also
|
285 | 286 | --------
|
| 287 | + Series.expanding : Calling object with Series data. |
| 288 | + DataFrame.expanding : Calling object with DataFrames. |
286 | 289 | Series.sum : Reducing sum for Series.
|
287 | 290 | DataFrame.sum : Reducing sum for DataFrame.
|
288 | 291 |
|
@@ -391,8 +394,10 @@ def max(self):
|
391 | 394 |
|
392 | 395 | See Also
|
393 | 396 | --------
|
394 |
| - Series.expanding : Series expanding. |
395 |
| - DataFrame.expanding : DataFrame expanding. |
| 397 | + Series.expanding : Calling object with Series data. |
| 398 | + DataFrame.expanding : Calling object with DataFrames. |
| 399 | + Series.max : Similar method for Series. |
| 400 | + DataFrame.max : Similar method for DataFrame. |
396 | 401 | """
|
397 | 402 | def max(scol):
|
398 | 403 | return F.when(
|
@@ -564,37 +569,262 @@ def count(self):
|
564 | 569 | --------
|
565 | 570 | Series.expanding : Calling object with Series data.
|
566 | 571 | DataFrame.expanding : Calling object with DataFrames.
|
| 572 | + Series.count : Count of the full Series. |
567 | 573 | DataFrame.count : Count of the full DataFrame.
|
568 | 574 |
|
569 | 575 | Examples
|
570 | 576 | --------
|
571 |
| - >>> s = ks.Series([2, 3, float("nan"), 10]) |
572 |
| - >>> s.name = "col" |
573 |
| - >>> s.groupby(s).expanding().count().sort_index() # doctest: +NORMALIZE_WHITESPACE |
574 |
| - col |
575 |
| - 2.0 0 1.0 |
576 |
| - 3.0 1 1.0 |
577 |
| - 10.0 3 1.0 |
578 |
| - Name: col, dtype: float64 |
579 |
| -
|
580 |
| - >>> df = s.to_frame() |
581 |
| - >>> df.groupby(df.col).expanding().count().sort_index() # doctest: +NORMALIZE_WHITESPACE |
582 |
| - col |
583 |
| - col |
584 |
| - 2.0 0 1.0 |
585 |
| - 3.0 1 1.0 |
586 |
| - 10.0 3 1.0 |
| 577 | + >>> s = ks.Series([2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5]) |
| 578 | + >>> s.groupby(s).expanding(3).sum().sort_index() # doctest: +NORMALIZE_WHITESPACE |
| 579 | + 0 |
| 580 | + 2 0 NaN |
| 581 | + 1 NaN |
| 582 | + 3 2 NaN |
| 583 | + 3 NaN |
| 584 | + 4 9.0 |
| 585 | + 4 5 NaN |
| 586 | + 6 NaN |
| 587 | + 7 12.0 |
| 588 | + 8 16.0 |
| 589 | + 5 9 NaN |
| 590 | + 10 NaN |
| 591 | + Name: 0, dtype: float64 |
| 592 | +
|
| 593 | + For DataFrame, each expanding sum is computed column-wise. |
| 594 | +
|
| 595 | + >>> df = ks.DataFrame({"A": s.to_numpy(), "B": s.to_numpy() ** 2}) |
| 596 | + >>> df.groupby(df.A).expanding(2).sum().sort_index() # doctest: +NORMALIZE_WHITESPACE |
| 597 | + A B |
| 598 | + A |
| 599 | + 2 0 NaN NaN |
| 600 | + 1 4.0 8.0 |
| 601 | + 3 2 NaN NaN |
| 602 | + 3 6.0 18.0 |
| 603 | + 4 9.0 27.0 |
| 604 | + 4 5 NaN NaN |
| 605 | + 6 8.0 32.0 |
| 606 | + 7 12.0 48.0 |
| 607 | + 8 16.0 64.0 |
| 608 | + 5 9 NaN NaN |
| 609 | + 10 10.0 50.0 |
587 | 610 | """
|
588 | 611 | return super(ExpandingGroupby, self).count()
|
589 | 612 |
|
590 | 613 | def sum(self):
|
591 |
| - raise NotImplementedError("groupby.expanding().sum() is currently not implemented yet.") |
| 614 | + """ |
| 615 | + Calculate expanding sum of given DataFrame or Series. |
| 616 | +
|
| 617 | + Returns |
| 618 | + ------- |
| 619 | + Series or DataFrame |
| 620 | + Same type as the input, with the same index, containing the |
| 621 | + expanding sum. |
| 622 | +
|
| 623 | + See Also |
| 624 | + -------- |
| 625 | + Series.expanding : Calling object with Series data. |
| 626 | + DataFrame.expanding : Calling object with DataFrames. |
| 627 | + Series.sum : Reducing sum for Series. |
| 628 | + DataFrame.sum : Reducing sum for DataFrame. |
| 629 | +
|
| 630 | + Examples |
| 631 | + -------- |
| 632 | + >>> s = ks.Series([2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5]) |
| 633 | + >>> s.groupby(s).expanding(3).sum().sort_index() # doctest: +NORMALIZE_WHITESPACE |
| 634 | + 0 |
| 635 | + 2 0 NaN |
| 636 | + 1 NaN |
| 637 | + 3 2 NaN |
| 638 | + 3 NaN |
| 639 | + 4 9.0 |
| 640 | + 4 5 NaN |
| 641 | + 6 NaN |
| 642 | + 7 12.0 |
| 643 | + 8 16.0 |
| 644 | + 5 9 NaN |
| 645 | + 10 NaN |
| 646 | + Name: 0, dtype: float64 |
| 647 | +
|
| 648 | + For DataFrame, each expanding sum is computed column-wise. |
| 649 | +
|
| 650 | + >>> df = ks.DataFrame({"A": s.to_numpy(), "B": s.to_numpy() ** 2}) |
| 651 | + >>> df.groupby(df.A).expanding(2).sum().sort_index() # doctest: +NORMALIZE_WHITESPACE |
| 652 | + A B |
| 653 | + A |
| 654 | + 2 0 NaN NaN |
| 655 | + 1 4.0 8.0 |
| 656 | + 3 2 NaN NaN |
| 657 | + 3 6.0 18.0 |
| 658 | + 4 9.0 27.0 |
| 659 | + 4 5 NaN NaN |
| 660 | + 6 8.0 32.0 |
| 661 | + 7 12.0 48.0 |
| 662 | + 8 16.0 64.0 |
| 663 | + 5 9 NaN NaN |
| 664 | + 10 10.0 50.0 |
| 665 | + """ |
| 666 | + return super(ExpandingGroupby, self).sum() |
592 | 667 |
|
593 | 668 | def min(self):
|
594 |
| - raise NotImplementedError("groupby.expanding().min() is currently not implemented yet.") |
| 669 | + """ |
| 670 | + Calculate the expanding minimum. |
| 671 | +
|
| 672 | + Returns |
| 673 | + ------- |
| 674 | + Series or DataFrame |
| 675 | + Returned object type is determined by the caller of the expanding |
| 676 | + calculation. |
| 677 | +
|
| 678 | + See Also |
| 679 | + -------- |
| 680 | + Series.expanding : Calling object with a Series. |
| 681 | + DataFrame.expanding : Calling object with a DataFrame. |
| 682 | + Series.min : Similar method for Series. |
| 683 | + DataFrame.min : Similar method for DataFrame. |
| 684 | +
|
| 685 | + Examples |
| 686 | + -------- |
| 687 | + >>> s = ks.Series([2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5]) |
| 688 | + >>> s.groupby(s).expanding(3).sum().sort_index() # doctest: +NORMALIZE_WHITESPACE |
| 689 | + 0 |
| 690 | + 2 0 NaN |
| 691 | + 1 NaN |
| 692 | + 3 2 NaN |
| 693 | + 3 NaN |
| 694 | + 4 9.0 |
| 695 | + 4 5 NaN |
| 696 | + 6 NaN |
| 697 | + 7 12.0 |
| 698 | + 8 16.0 |
| 699 | + 5 9 NaN |
| 700 | + 10 NaN |
| 701 | + Name: 0, dtype: float64 |
| 702 | +
|
| 703 | + For DataFrame, each expanding sum is computed column-wise. |
| 704 | +
|
| 705 | + >>> df = ks.DataFrame({"A": s.to_numpy(), "B": s.to_numpy() ** 2}) |
| 706 | + >>> df.groupby(df.A).expanding(2).sum().sort_index() # doctest: +NORMALIZE_WHITESPACE |
| 707 | + A B |
| 708 | + A |
| 709 | + 2 0 NaN NaN |
| 710 | + 1 4.0 8.0 |
| 711 | + 3 2 NaN NaN |
| 712 | + 3 6.0 18.0 |
| 713 | + 4 9.0 27.0 |
| 714 | + 4 5 NaN NaN |
| 715 | + 6 8.0 32.0 |
| 716 | + 7 12.0 48.0 |
| 717 | + 8 16.0 64.0 |
| 718 | + 5 9 NaN NaN |
| 719 | + 10 10.0 50.0 |
| 720 | + """ |
| 721 | + return super(ExpandingGroupby, self).min() |
595 | 722 |
|
596 | 723 | def max(self):
|
597 |
| - raise NotImplementedError("groupby.expanding().max() is currently not implemented yet.") |
| 724 | + """ |
| 725 | + Calculate the expanding maximum. |
| 726 | +
|
| 727 | + Returns |
| 728 | + ------- |
| 729 | + Series or DataFrame |
| 730 | + Return type is determined by the caller. |
| 731 | +
|
| 732 | + See Also |
| 733 | + -------- |
| 734 | + Series.expanding : Calling object with Series data. |
| 735 | + DataFrame.expanding : Calling object with DataFrames. |
| 736 | + Series.max : Similar method for Series. |
| 737 | + DataFrame.max : Similar method for DataFrame. |
| 738 | +
|
| 739 | + Examples |
| 740 | + -------- |
| 741 | + >>> s = ks.Series([2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5]) |
| 742 | + >>> s.groupby(s).expanding(3).sum().sort_index() # doctest: +NORMALIZE_WHITESPACE |
| 743 | + 0 |
| 744 | + 2 0 NaN |
| 745 | + 1 NaN |
| 746 | + 3 2 NaN |
| 747 | + 3 NaN |
| 748 | + 4 9.0 |
| 749 | + 4 5 NaN |
| 750 | + 6 NaN |
| 751 | + 7 12.0 |
| 752 | + 8 16.0 |
| 753 | + 5 9 NaN |
| 754 | + 10 NaN |
| 755 | + Name: 0, dtype: float64 |
| 756 | +
|
| 757 | + For DataFrame, each expanding sum is computed column-wise. |
| 758 | +
|
| 759 | + >>> df = ks.DataFrame({"A": s.to_numpy(), "B": s.to_numpy() ** 2}) |
| 760 | + >>> df.groupby(df.A).expanding(2).sum().sort_index() # doctest: +NORMALIZE_WHITESPACE |
| 761 | + A B |
| 762 | + A |
| 763 | + 2 0 NaN NaN |
| 764 | + 1 4.0 8.0 |
| 765 | + 3 2 NaN NaN |
| 766 | + 3 6.0 18.0 |
| 767 | + 4 9.0 27.0 |
| 768 | + 4 5 NaN NaN |
| 769 | + 6 8.0 32.0 |
| 770 | + 7 12.0 48.0 |
| 771 | + 8 16.0 64.0 |
| 772 | + 5 9 NaN NaN |
| 773 | + 10 10.0 50.0 |
| 774 | + """ |
| 775 | + return super(ExpandingGroupby, self).max() |
598 | 776 |
|
599 | 777 | def mean(self):
|
600 |
| - raise NotImplementedError("groupby.expanding().mean() is currently not implemented yet.") |
| 778 | + """ |
| 779 | + Calculate the expanding mean of the values. |
| 780 | +
|
| 781 | + Returns |
| 782 | + ------- |
| 783 | + Series or DataFrame |
| 784 | + Returned object type is determined by the caller of the expanding |
| 785 | + calculation. |
| 786 | +
|
| 787 | + See Also |
| 788 | + -------- |
| 789 | + Series.expanding : Calling object with Series data. |
| 790 | + DataFrame.expanding : Calling object with DataFrames. |
| 791 | + Series.mean : Equivalent method for Series. |
| 792 | + DataFrame.mean : Equivalent method for DataFrame. |
| 793 | +
|
| 794 | + Examples |
| 795 | + -------- |
| 796 | + >>> s = ks.Series([2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5]) |
| 797 | + >>> s.groupby(s).expanding(3).sum().sort_index() # doctest: +NORMALIZE_WHITESPACE |
| 798 | + 0 |
| 799 | + 2 0 NaN |
| 800 | + 1 NaN |
| 801 | + 3 2 NaN |
| 802 | + 3 NaN |
| 803 | + 4 9.0 |
| 804 | + 4 5 NaN |
| 805 | + 6 NaN |
| 806 | + 7 12.0 |
| 807 | + 8 16.0 |
| 808 | + 5 9 NaN |
| 809 | + 10 NaN |
| 810 | + Name: 0, dtype: float64 |
| 811 | +
|
| 812 | + For DataFrame, each expanding sum is computed column-wise. |
| 813 | +
|
| 814 | + >>> df = ks.DataFrame({"A": s.to_numpy(), "B": s.to_numpy() ** 2}) |
| 815 | + >>> df.groupby(df.A).expanding(2).sum().sort_index() # doctest: +NORMALIZE_WHITESPACE |
| 816 | + A B |
| 817 | + A |
| 818 | + 2 0 NaN NaN |
| 819 | + 1 4.0 8.0 |
| 820 | + 3 2 NaN NaN |
| 821 | + 3 6.0 18.0 |
| 822 | + 4 9.0 27.0 |
| 823 | + 4 5 NaN NaN |
| 824 | + 6 8.0 32.0 |
| 825 | + 7 12.0 48.0 |
| 826 | + 8 16.0 64.0 |
| 827 | + 5 9 NaN NaN |
| 828 | + 10 10.0 50.0 |
| 829 | + """ |
| 830 | + return super(ExpandingGroupby, self).mean() |
0 commit comments