Skip to content

Commit 20f5be6

Browse files
itholicHyukjinKwon
authored andcommitted
Implement Series.update (#923)
Like pandas Series.update (https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.update.html#pandas.Series.update) implemented function `update` for series. ```python >>> s = ks.Series([1, 2, 3]) >>> s.update(ks.Series([4, 5, 6])) >>> s 0 4 1 5 2 6 Name: 0, dtype: int64 >>> s = ks.Series(['a', 'b', 'c']) >>> s.update(ks.Series(['d', 'e'], index=[0, 2])) >>> s 0 d 1 b 2 e Name: 0, dtype: object >>> s = ks.Series([1, 2, 3]) >>> s.update(ks.Series([4, 5, 6, 7, 8])) >>> s 0 4 1 5 2 6 Name: 0, dtype: int64 >>> s = ks.Series([1, 2, 3]) >>> s.update(ks.Series([4, np.nan, 6])) >>> s 0 4.0 1 2.0 2 6.0 Name: 0, dtype: float64 ```
1 parent 2c4fe3c commit 20f5be6

File tree

4 files changed

+102
-2
lines changed

4 files changed

+102
-2
lines changed

databricks/koalas/missing/series.py

-1
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,6 @@ class _MissingPandasLikeSeries(object):
106106
tz_convert = unsupported_function('tz_convert')
107107
tz_localize = unsupported_function('tz_localize')
108108
unstack = unsupported_function('unstack')
109-
update = unsupported_function('update')
110109
view = unsupported_function('view')
111110

112111
# Deprecated functions

databricks/koalas/series.py

+93-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
from databricks.koalas.missing.series import _MissingPandasLikeSeries
4444
from databricks.koalas.plot import KoalasSeriesPlotMethods
4545
from databricks.koalas.utils import (validate_arguments_and_invoke_function, scol_for,
46-
name_like_string)
46+
combine_frames, name_like_string)
4747
from databricks.koalas.datetimes import DatetimeMethods
4848
from databricks.koalas.strings import StringMethods
4949

@@ -3725,6 +3725,98 @@ def replace(self, to_replace=None, value=None, regex=False) -> 'Series':
37253725

37263726
return self._with_new_scol(current)
37273727

3728+
def update(self, other):
3729+
"""
3730+
Modify Series in place using non-NA values from passed Series. Aligns on index.
3731+
3732+
Parameters
3733+
----------
3734+
other : Series
3735+
3736+
Examples
3737+
--------
3738+
>>> from databricks.koalas.config import set_option, reset_option
3739+
>>> set_option("compute.ops_on_diff_frames", True)
3740+
>>> s = ks.Series([1, 2, 3])
3741+
>>> s.update(ks.Series([4, 5, 6]))
3742+
>>> s.sort_index()
3743+
0 4
3744+
1 5
3745+
2 6
3746+
Name: 0, dtype: int64
3747+
3748+
>>> s = ks.Series(['a', 'b', 'c'])
3749+
>>> s.update(ks.Series(['d', 'e'], index=[0, 2]))
3750+
>>> s.sort_index()
3751+
0 d
3752+
1 b
3753+
2 e
3754+
Name: 0, dtype: object
3755+
3756+
>>> s = ks.Series([1, 2, 3])
3757+
>>> s.update(ks.Series([4, 5, 6, 7, 8]))
3758+
>>> s.sort_index()
3759+
0 4
3760+
1 5
3761+
2 6
3762+
Name: 0, dtype: int64
3763+
3764+
>>> s = ks.Series([1, 2, 3], index=[10, 11, 12])
3765+
>>> s
3766+
10 1
3767+
11 2
3768+
12 3
3769+
Name: 0, dtype: int64
3770+
3771+
>>> s.update(ks.Series([4, 5, 6]))
3772+
>>> s.sort_index()
3773+
10 1
3774+
11 2
3775+
12 3
3776+
Name: 0, dtype: int64
3777+
3778+
>>> s.update(ks.Series([4, 5, 6], index=[11, 12, 13]))
3779+
>>> s.sort_index()
3780+
10 1
3781+
11 4
3782+
12 5
3783+
Name: 0, dtype: int64
3784+
3785+
If ``other`` contains NaNs the corresponding values are not updated
3786+
in the original Series.
3787+
3788+
>>> s = ks.Series([1, 2, 3])
3789+
>>> s.update(ks.Series([4, np.nan, 6]))
3790+
>>> s.sort_index()
3791+
0 4.0
3792+
1 2.0
3793+
2 6.0
3794+
Name: 0, dtype: float64
3795+
3796+
>>> reset_option("compute.ops_on_diff_frames")
3797+
"""
3798+
if not isinstance(other, Series):
3799+
raise ValueError("'other' must be a Series")
3800+
3801+
index_scol_names = [index_map[0] for index_map in self._internal.index_map]
3802+
combined = combine_frames(self.to_frame(), other.to_frame(), how='leftouter')
3803+
combined_sdf = combined._sdf
3804+
this_col = "__this_%s" % str(
3805+
self._internal.column_name_for(self._internal.column_index[0]))
3806+
that_col = "__that_%s" % str(
3807+
self._internal.column_name_for(other._internal.column_index[0]))
3808+
cond = F.when(scol_for(combined_sdf, that_col).isNotNull(),
3809+
scol_for(combined_sdf, that_col)) \
3810+
.otherwise(combined_sdf[this_col]) \
3811+
.alias(str(self._internal.column_name_for(self._internal.column_index[0])))
3812+
internal = _InternalFrame(
3813+
sdf=combined_sdf.select(index_scol_names + [cond]),
3814+
index_map=self._internal.index_map,
3815+
column_index=self._internal.column_index)
3816+
self_updated = _col(ks.DataFrame(internal))
3817+
self._internal = self_updated._internal
3818+
self._kdf = self_updated._kdf
3819+
37283820
def where(self, cond, other=np.nan):
37293821
"""
37303822
Replace values where the condition is False.

databricks/koalas/tests/test_series.py

+8
Original file line numberDiff line numberDiff line change
@@ -767,6 +767,14 @@ def test_duplicates(self):
767767
self.assert_eq(pser.drop_duplicates().sort_values(),
768768
kser.drop_duplicates().sort_values())
769769

770+
def test_update(self):
771+
pser = pd.Series([10, 20, 15, 30, 45], name='x')
772+
kser = ks.Series(pser)
773+
774+
msg = "'other' must be a Series"
775+
with self.assertRaisesRegex(ValueError, msg):
776+
kser.update(10)
777+
770778
def test_where(self):
771779
pser1 = pd.Series([0, 1, 2, 3, 4], name=0)
772780
pser2 = pd.Series([100, 200, 300, 400, 500], name=0)

docs/source/reference/series.rst

+1
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ Combining / joining / merging
182182

183183
Series.append
184184
Series.replace
185+
Series.update
185186

186187
Time series-related
187188
-------------------

0 commit comments

Comments
 (0)