Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement Series.update #923

Merged
merged 11 commits into from
Nov 7, 2019
1 change: 0 additions & 1 deletion databricks/koalas/missing/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,6 @@ class _MissingPandasLikeSeries(object):
tz_convert = unsupported_function('tz_convert')
tz_localize = unsupported_function('tz_localize')
unstack = unsupported_function('unstack')
update = unsupported_function('update')
view = unsupported_function('view')
where = unsupported_function('where')
xs = unsupported_function('xs')
Expand Down
60 changes: 60 additions & 0 deletions databricks/koalas/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3409,6 +3409,66 @@ def replace(self, to_replace=None, value=None, regex=False) -> 'Series':

return self._with_new_scol(current)

def update(self, other):
"""
Modify Series in place using non-NA values from passed Series. Aligns on index.

Parameters
----------
other : Series

Examples
--------
>>> s = ks.Series([1, 2, 3])
>>> s.update(ks.Series([4, 5, 6]))
>>> s
itholic marked this conversation as resolved.
Show resolved Hide resolved
0 4
1 5
2 6
Name: 0, dtype: int64

>>> s = ks.Series(['a', 'b', 'c'])
>>> s.update(ks.Series(['d', 'e'], index=[0, 2]))
>>> s
0 d
1 b
2 e
Name: 0, dtype: object

>>> s = ks.Series([1, 2, 3])
>>> s.update(ks.Series([4, 5, 6, 7, 8]))
>>> s
0 4
1 5
2 6
Name: 0, dtype: int64

If ``other`` contains NaNs the corresponding values are not updated
in the original Series.

>>> s = ks.Series([1, 2, 3])
>>> s.update(ks.Series([4, np.nan, 6]))
>>> s
0 4.0
1 2.0
2 6.0
Name: 0, dtype: float64
"""
if not isinstance(other, Series):
raise ValueError("'other' must be a Series")

self_sdf = self._internal.sdf
other_sdf = other._internal.sdf.limit(len(self))
temp_col = self.name + "_"
temp_idx = self._index_map[0][0]

other_temp = other_sdf.withColumn(temp_col, other_sdf[other.name])
new_sdf = self_sdf.join(other_temp, temp_idx, 'outer').sort(temp_idx)
itholic marked this conversation as resolved.
Show resolved Hide resolved
cond = F.when(other_temp[temp_col].isNotNull(), other_temp[temp_col]) \
.otherwise(self._scol) \
.alias(self.name)
self._internal = _col(ks.DataFrame(_InternalFrame(sdf=new_sdf.select(cond))))._internal

def _cum(self, func, skipna, part_cols=()):
# This is used to cummin, cummax, cumsum, etc.
index_columns = self._internal.index_columns
Expand Down
8 changes: 8 additions & 0 deletions databricks/koalas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -728,3 +728,11 @@ def test_duplicates(self):

self.assert_eq(pser.drop_duplicates().sort_values(),
kser.drop_duplicates().sort_values())

def test_update(self):
pser = pd.Series([10, 20, 15, 30, 45], name='x')
kser = ks.Series(pser)
itholic marked this conversation as resolved.
Show resolved Hide resolved

msg = "'other' must be a Series"
with self.assertRaisesRegex(ValueError, msg):
kser.update(10)
1 change: 1 addition & 0 deletions docs/source/reference/series.rst
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ Combining / joining / merging

Series.append
Series.replace
Series.update

Time series-related
-------------------
Expand Down