Skip to content

Commit

Permalink
Fix to allow operations on different dataframe with test case
Browse files Browse the repository at this point in the history
  • Loading branch information
itholic committed Oct 11, 2019
1 parent 2c359d6 commit ddf9061
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 6 deletions.
18 changes: 12 additions & 6 deletions databricks/koalas/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,13 +507,19 @@ def __setitem__(self, key, value):
if isinstance(cols_sel, str):
cols_sel = [cols_sel]
kdf = self._kdf
sdf = kdf._sdf
for col_sel in cols_sel:
sdf = sdf.withColumn(
col_sel,
(F.when(F.col(kdf._internal.index_columns[0]).isin(rows_sel), value)
.otherwise(F.col(col_sel))))
self._kdf._internal = self._kdf._internal.copy(sdf=sdf)
# Uses `kdf` to allow operations on different DataFrames.
# TODO: avoid temp column name or declare `__` prefix is
# reserved for Koalas' internal columns.
kdf["__indexing_temp_col__"] = value
new_col = kdf["__indexing_temp_col__"]._scol
kdf[col_sel] = Series(kdf[col_sel]._internal.copy(
scol=F.when(
kdf._internal.index_scols[0].isin(rows_sel), new_col
).otherwise(kdf[col_sel]._scol)), anchor=kdf)
kdf = kdf.drop(labels=['__indexing_temp_col__'])

self._kdf._internal = kdf._internal.copy()
else:
raise SparkPandasNotImplementedError(
description="""Can only assign value to the whole dataframe, the row index
Expand Down
8 changes: 8 additions & 0 deletions databricks/koalas/tests/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,14 @@ def test_setitem(self):
with self.assertRaisesRegex(ValueError,
'Only a dataframe with one column can be assigned'):
kdf.loc[:, 'max_speed'] = kdf
with self.assertRaisesRegex(ValueError,
'only column names or list of column names can be assigned'):
kdf.loc[['viper'], ('max_speed', 'shield')] = 10
msg = """Can only assign value to the whole dataframe, the row index
has to be `slice(None)` or `:`"""
msg = ("Can only assign value to the whole dataframe, the row index")
with self.assertRaisesRegex(SparkPandasNotImplementedError, msg):
kdf.loc['viper', 'max_speed'] = 10

pdf = pd.DataFrame([[1], [4], [7]],
index=['cobra', 'viper', 'sidewinder'],
Expand Down
24 changes: 24 additions & 0 deletions databricks/koalas/tests/test_ops_on_diff_frames.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,19 @@ def test_multi_index_assignment_frame(self):

self.assert_eq(kdf.sort_index(), pdf.sort_index())

def test_loc_setitem(self):
pdf = pd.DataFrame(
[[1, 2], [4, 5], [7, 8]],
index=['cobra', 'viper', 'sidewinder'],
columns=['max_speed', 'shield'])
kdf = ks.DataFrame(pdf)
another_kdf = ks.DataFrame(pdf)

kdf.loc[['viper', 'sidewinder'], ['shield']] = another_kdf.max_speed
pdf.loc[['viper', 'sidewinder'], ['shield']] = pdf.max_speed

self.assert_eq(kdf.sort_index(), pdf.sort_index())


class OpsOnDiffFramesDisabledTest(ReusedSQLTestCase, SQLTestUtils):

Expand Down Expand Up @@ -447,3 +460,14 @@ def test_assignment(self):
with self.assertRaisesRegex(ValueError, "Cannot combine column argument"):
kdf = ks.from_pandas(self.pdf1)
kdf['c'] = self.kdf1.a

def test_loc_setitem(self):
pdf = pd.DataFrame(
[[1, 2], [4, 5], [7, 8]],
index=['cobra', 'viper', 'sidewinder'],
columns=['max_speed', 'shield'])
kdf = ks.DataFrame(pdf)
another_kdf = ks.DataFrame(pdf)

with self.assertRaisesRegex(ValueError, "Cannot combine column argument"):
kdf.loc[['viper', 'sidewinder'], ['shield']] = another_kdf.max_speed

0 comments on commit ddf9061

Please sign in to comment.