Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement Series.update #923

Merged
merged 11 commits into from
Nov 7, 2019
1 change: 0 additions & 1 deletion databricks/koalas/missing/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,6 @@ class _MissingPandasLikeSeries(object):
tz_convert = unsupported_function('tz_convert')
tz_localize = unsupported_function('tz_localize')
unstack = unsupported_function('unstack')
update = unsupported_function('update')
view = unsupported_function('view')
where = unsupported_function('where')
xs = unsupported_function('xs')
Expand Down
86 changes: 85 additions & 1 deletion databricks/koalas/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
from databricks.koalas.internal import IndexMap, _InternalFrame, SPARK_INDEX_NAME_FORMAT
from databricks.koalas.missing.series import _MissingPandasLikeSeries
from databricks.koalas.plot import KoalasSeriesPlotMethods
from databricks.koalas.utils import validate_arguments_and_invoke_function, scol_for
from databricks.koalas.utils import validate_arguments_and_invoke_function, scol_for, combine_frames
from databricks.koalas.datetimes import DatetimeMethods
from databricks.koalas.strings import StringMethods

Expand Down Expand Up @@ -3409,6 +3409,90 @@ def replace(self, to_replace=None, value=None, regex=False) -> 'Series':

return self._with_new_scol(current)

def update(self, other):
"""
Modify Series in place using non-NA values from passed Series. Aligns on index.

Parameters
----------
other : Series

Examples
--------
>>> from databricks.koalas.config import set_option, reset_option
>>> set_option("compute.ops_on_diff_frames", True)
itholic marked this conversation as resolved.
Show resolved Hide resolved
>>> s = ks.Series([1, 2, 3])
>>> s.update(ks.Series([4, 5, 6]))
>>> s
itholic marked this conversation as resolved.
Show resolved Hide resolved
0 4
1 5
2 6
Name: 0, dtype: int64

>>> s = ks.Series(['a', 'b', 'c'])
>>> s.update(ks.Series(['d', 'e'], index=[0, 2]))
>>> s
0 d
1 b
2 e
Name: 0, dtype: object

>>> s = ks.Series([1, 2, 3])
>>> s.update(ks.Series([4, 5, 6, 7, 8]))
>>> s
0 4
1 5
2 6
Name: 0, dtype: int64

>>> s = ks.Series([1, 2, 3], index=[10, 11, 12])
>>> s
10 1
11 2
12 3
Name: 0, dtype: int64

>>> s.update(ks.Series([4, 5, 6]))
>>> s
10 1
11 2
12 3
Name: 0, dtype: int64

>>> s.update(ks.Series([4, 5, 6], index=[11, 12, 13]))
>>> s
10 1
11 4
12 5
Name: 0, dtype: int64

If ``other`` contains NaNs the corresponding values are not updated
in the original Series.

>>> s = ks.Series([1, 2, 3])
>>> s.update(ks.Series([4, np.nan, 6]))
>>> s
0 4.0
1 2.0
2 6.0
Name: 0, dtype: float64

>>> reset_option("compute.ops_on_diff_frames")
"""
if not isinstance(other, Series):
raise ValueError("'other' must be a Series")

index_scol_name = self._index_map[0][0]
combined = combine_frames(self._kdf, other._kdf, how='leftouter')
itholic marked this conversation as resolved.
Show resolved Hide resolved
combined_sdf = combined._sdf.sort(index_scol_name)
itholic marked this conversation as resolved.
Show resolved Hide resolved
cond = F.when(combined_sdf['__that_0'].isNotNull(), combined_sdf['__that_0']) \
.otherwise(combined_sdf['__this_0']) \
.alias(self.name)
internal = _InternalFrame(
sdf=combined_sdf.select(index_scol_name, cond),
index_map=self._index_map)
itholic marked this conversation as resolved.
Show resolved Hide resolved
self._internal = _col(ks.DataFrame(internal))._internal
itholic marked this conversation as resolved.
Show resolved Hide resolved

def _cum(self, func, skipna, part_cols=()):
# This is used to cummin, cummax, cumsum, etc.
index_columns = self._internal.index_columns
Expand Down
8 changes: 8 additions & 0 deletions databricks/koalas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -728,3 +728,11 @@ def test_duplicates(self):

self.assert_eq(pser.drop_duplicates().sort_values(),
kser.drop_duplicates().sort_values())

def test_update(self):
pser = pd.Series([10, 20, 15, 30, 45], name='x')
kser = ks.Series(pser)
itholic marked this conversation as resolved.
Show resolved Hide resolved

msg = "'other' must be a Series"
with self.assertRaisesRegex(ValueError, msg):
kser.update(10)
1 change: 1 addition & 0 deletions docs/source/reference/series.rst
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ Combining / joining / merging

Series.append
Series.replace
Series.update

Time series-related
-------------------
Expand Down