|
43 | 43 | from databricks.koalas.missing.series import _MissingPandasLikeSeries
|
44 | 44 | from databricks.koalas.plot import KoalasSeriesPlotMethods
|
45 | 45 | from databricks.koalas.utils import (validate_arguments_and_invoke_function, scol_for,
|
46 |
| - name_like_string) |
| 46 | + combine_frames, name_like_string) |
47 | 47 | from databricks.koalas.datetimes import DatetimeMethods
|
48 | 48 | from databricks.koalas.strings import StringMethods
|
49 | 49 |
|
@@ -3725,6 +3725,98 @@ def replace(self, to_replace=None, value=None, regex=False) -> 'Series':
|
3725 | 3725 |
|
3726 | 3726 | return self._with_new_scol(current)
|
3727 | 3727 |
|
| 3728 | + def update(self, other): |
| 3729 | + """ |
| 3730 | + Modify Series in place using non-NA values from passed Series. Aligns on index. |
| 3731 | +
|
| 3732 | + Parameters |
| 3733 | + ---------- |
| 3734 | + other : Series |
| 3735 | +
|
| 3736 | + Examples |
| 3737 | + -------- |
| 3738 | + >>> from databricks.koalas.config import set_option, reset_option |
| 3739 | + >>> set_option("compute.ops_on_diff_frames", True) |
| 3740 | + >>> s = ks.Series([1, 2, 3]) |
| 3741 | + >>> s.update(ks.Series([4, 5, 6])) |
| 3742 | + >>> s.sort_index() |
| 3743 | + 0 4 |
| 3744 | + 1 5 |
| 3745 | + 2 6 |
| 3746 | + Name: 0, dtype: int64 |
| 3747 | +
|
| 3748 | + >>> s = ks.Series(['a', 'b', 'c']) |
| 3749 | + >>> s.update(ks.Series(['d', 'e'], index=[0, 2])) |
| 3750 | + >>> s.sort_index() |
| 3751 | + 0 d |
| 3752 | + 1 b |
| 3753 | + 2 e |
| 3754 | + Name: 0, dtype: object |
| 3755 | +
|
| 3756 | + >>> s = ks.Series([1, 2, 3]) |
| 3757 | + >>> s.update(ks.Series([4, 5, 6, 7, 8])) |
| 3758 | + >>> s.sort_index() |
| 3759 | + 0 4 |
| 3760 | + 1 5 |
| 3761 | + 2 6 |
| 3762 | + Name: 0, dtype: int64 |
| 3763 | +
|
| 3764 | + >>> s = ks.Series([1, 2, 3], index=[10, 11, 12]) |
| 3765 | + >>> s |
| 3766 | + 10 1 |
| 3767 | + 11 2 |
| 3768 | + 12 3 |
| 3769 | + Name: 0, dtype: int64 |
| 3770 | +
|
| 3771 | + >>> s.update(ks.Series([4, 5, 6])) |
| 3772 | + >>> s.sort_index() |
| 3773 | + 10 1 |
| 3774 | + 11 2 |
| 3775 | + 12 3 |
| 3776 | + Name: 0, dtype: int64 |
| 3777 | +
|
| 3778 | + >>> s.update(ks.Series([4, 5, 6], index=[11, 12, 13])) |
| 3779 | + >>> s.sort_index() |
| 3780 | + 10 1 |
| 3781 | + 11 4 |
| 3782 | + 12 5 |
| 3783 | + Name: 0, dtype: int64 |
| 3784 | +
|
| 3785 | + If ``other`` contains NaNs the corresponding values are not updated |
| 3786 | + in the original Series. |
| 3787 | +
|
| 3788 | + >>> s = ks.Series([1, 2, 3]) |
| 3789 | + >>> s.update(ks.Series([4, np.nan, 6])) |
| 3790 | + >>> s.sort_index() |
| 3791 | + 0 4.0 |
| 3792 | + 1 2.0 |
| 3793 | + 2 6.0 |
| 3794 | + Name: 0, dtype: float64 |
| 3795 | +
|
| 3796 | + >>> reset_option("compute.ops_on_diff_frames") |
| 3797 | + """ |
| 3798 | + if not isinstance(other, Series): |
| 3799 | + raise ValueError("'other' must be a Series") |
| 3800 | + |
| 3801 | + index_scol_names = [index_map[0] for index_map in self._internal.index_map] |
| 3802 | + combined = combine_frames(self.to_frame(), other.to_frame(), how='leftouter') |
| 3803 | + combined_sdf = combined._sdf |
| 3804 | + this_col = "__this_%s" % str( |
| 3805 | + self._internal.column_name_for(self._internal.column_index[0])) |
| 3806 | + that_col = "__that_%s" % str( |
| 3807 | + self._internal.column_name_for(other._internal.column_index[0])) |
| 3808 | + cond = F.when(scol_for(combined_sdf, that_col).isNotNull(), |
| 3809 | + scol_for(combined_sdf, that_col)) \ |
| 3810 | + .otherwise(combined_sdf[this_col]) \ |
| 3811 | + .alias(str(self._internal.column_name_for(self._internal.column_index[0]))) |
| 3812 | + internal = _InternalFrame( |
| 3813 | + sdf=combined_sdf.select(index_scol_names + [cond]), |
| 3814 | + index_map=self._internal.index_map, |
| 3815 | + column_index=self._internal.column_index) |
| 3816 | + self_updated = _col(ks.DataFrame(internal)) |
| 3817 | + self._internal = self_updated._internal |
| 3818 | + self._kdf = self_updated._kdf |
| 3819 | + |
3728 | 3820 | def where(self, cond, other=np.nan):
|
3729 | 3821 | """
|
3730 | 3822 | Replace values where the condition is False.
|
|
0 commit comments