databricks · HyukjinKwon · Nov 7, 2019 · Oct 14, 2019 · Oct 14, 2019 · Oct 14, 2019
diff --git a/databricks/koalas/missing/series.py b/databricks/koalas/missing/series.py
@@ -111,7 +111,6 @@ class _MissingPandasLikeSeries(object):
     tz_convert = unsupported_function('tz_convert')
     tz_localize = unsupported_function('tz_localize')
     unstack = unsupported_function('unstack')
-    update = unsupported_function('update')
     view = unsupported_function('view')
     where = unsupported_function('where')
     xs = unsupported_function('xs')

diff --git a/databricks/koalas/series.py b/databricks/koalas/series.py
@@ -42,7 +42,7 @@
 from databricks.koalas.internal import IndexMap, _InternalFrame, SPARK_INDEX_NAME_FORMAT
 from databricks.koalas.missing.series import _MissingPandasLikeSeries
 from databricks.koalas.plot import KoalasSeriesPlotMethods
-from databricks.koalas.utils import validate_arguments_and_invoke_function, scol_for
+from databricks.koalas.utils import validate_arguments_and_invoke_function, scol_for, combine_frames
 from databricks.koalas.datetimes import DatetimeMethods
 from databricks.koalas.strings import StringMethods
 
@@ -3405,6 +3405,95 @@ def replace(self, to_replace=None, value=None, regex=False) -> 'Series':
 
         return self._with_new_scol(current)
 
+    def update(self, other):
+        """
+        Modify Series in place using non-NA values from passed Series. Aligns on index.
+
+        Parameters
+        ----------
+        other : Series
+
+        Examples
+        --------
+        >>> from databricks.koalas.config import set_option, reset_option
+        >>> set_option("compute.ops_on_diff_frames", True)
+        >>> s = ks.Series([1, 2, 3])
+        >>> s.update(ks.Series([4, 5, 6]))
+        >>> s
+        0    4
+        1    5
+        2    6
+        Name: 0, dtype: int64
+
+        >>> s = ks.Series(['a', 'b', 'c'])
+        >>> s.update(ks.Series(['d', 'e'], index=[0, 2]))
+        >>> s
+        0    d
+        1    b
+        2    e
+        Name: 0, dtype: object
+
+        >>> s = ks.Series([1, 2, 3])
+        >>> s.update(ks.Series([4, 5, 6, 7, 8]))
+        >>> s
+        0    4
+        1    5
+        2    6
+        Name: 0, dtype: int64
+
+        >>> s = ks.Series([1, 2, 3], index=[10, 11, 12])
+        >>> s
+        10    1
+        11    2
+        12    3
+        Name: 0, dtype: int64
+
+        >>> s.update(ks.Series([4, 5, 6]))
+        >>> s
+        10    1
+        12    3
+        11    2
+        Name: 0, dtype: int64
+
+        >>> s.update(ks.Series([4, 5, 6], index=[11, 12, 13]))
+        >>> s
+        10    1
+        12    5
+        11    4
+        Name: 0, dtype: int64
+
+        If ``other`` contains NaNs the corresponding values are not updated
+        in the original Series.
+
+        >>> s = ks.Series([1, 2, 3])
+        >>> s.update(ks.Series([4, np.nan, 6]))
+        >>> s
+        0    4.0
+        1    2.0
+        2    6.0
+        Name: 0, dtype: float64
+
+        >>> reset_option("compute.ops_on_diff_frames")
+        """
+        if not isinstance(other, Series):
+            raise ValueError("'other' must be a Series")
+
+        index_scol_names = [index_map[0] for index_map in self._internal.index_map]
+        combined = combine_frames(self.to_frame(), other.to_frame(), how='leftouter')
+        combined_sdf = combined._sdf
+        this_col = "__this_%s" % str(self.name)
+        that_col = "__that_%s" % str(other.name)
+        cond = F.when(combined_sdf[that_col].isNotNull(), combined_sdf[that_col]) \
+                .otherwise(combined_sdf[this_col]) \
+                .alias(str(self.name))
+        internal = _InternalFrame(
+            sdf=combined_sdf.select(index_scol_names + [cond]),
+            index_map=self._internal.index_map,
+            column_index=self._internal.column_index)
+        self_updated = _col(ks.DataFrame(internal))
+        self._internal = self_updated._internal
+        self._kdf = self_updated._kdf
+
     def _cum(self, func, skipna, part_cols=()):
         # This is used to cummin, cummax, cumsum, etc.
         index_columns = self._internal.index_columns

diff --git a/databricks/koalas/tests/test_series.py b/databricks/koalas/tests/test_series.py
@@ -728,3 +728,11 @@ def test_duplicates(self):
 
         self.assert_eq(pser.drop_duplicates().sort_values(),
                        kser.drop_duplicates().sort_values())
+
+    def test_update(self):
+        pser = pd.Series([10, 20, 15, 30, 45], name='x')
+        kser = ks.Series(pser)
+
+        msg = "'other' must be a Series"
+        with self.assertRaisesRegex(ValueError, msg):
+            kser.update(10)
diff --git a/docs/source/reference/series.rst b/docs/source/reference/series.rst
@@ -176,6 +176,7 @@ Combining / joining / merging
 
    Series.append
    Series.replace
+   Series.update
 
 Time series-related
 -------------------