Skip to content

Commit

Permalink
Fix all/any. (#652)
Browse files Browse the repository at this point in the history
This PR fixes `IndexOpsMixin.all()` and `any()` to work the chain of operations, e.g.,:

```py
>>> (ks.Series([1,2,3]) % 2 == 0).all()
```

This fails with the following error.

```
Traceback (most recent call last):
  File "/Users/ueshin/workspace/databricks-koalas/miniconda/envs/databricks-koalas_3.6/lib/python3.6/site-packages/pyspark/sql/utils.py", line 63, in deco
    return f(*a, **kw)
  File "/Users/ueshin/workspace/databricks-koalas/miniconda/envs/databricks-koalas_3.6/lib/python3.6/site-packages/py4j/protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o143.select.
: org.apache.spark.sql.AnalysisException: Resolved attribute(s) 0#14L missing from ((0 % 2) = 0)#22 in operator !Aggregate [min(coalesce(cast(((0#14L % cast(2 as bigint)) = cast(0 as bigint)) as boolean), true)) AS min(coalesce(CAST(((0 % 2) = 0) AS BOOLEAN), true))#25].;;
!Aggregate [min(coalesce(cast(((0#14L % cast(2 as bigint)) = cast(0 as bigint)) as boolean), true)) AS min(coalesce(CAST(((0 % 2) = 0) AS BOOLEAN), true))#25]
+- Project [((0#14L % cast(2 as bigint)) = cast(0 as bigint)) AS ((0 % 2) = 0)#22]
   +- LogicalRDD [__index_level_0__#13L, 0#14L], false
```
  • Loading branch information
ueshin authored and HyukjinKwon committed Aug 19, 2019
1 parent eadb8a7 commit 73a2854
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 3 deletions.
6 changes: 3 additions & 3 deletions databricks/koalas/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
from databricks import koalas as ks # For running doctests and reference resolution in PyCharm.
from databricks.koalas.internal import _InternalFrame
from databricks.koalas.typedef import pandas_wraps
from databricks.koalas.utils import align_diff_series
from databricks.koalas.utils import align_diff_series, scol_for


def _column_op(f):
Expand Down Expand Up @@ -582,7 +582,7 @@ def all(self, axis: Union[int, str] = 0) -> bool:
raise ValueError('axis should be either 0 or "index" currently.')

sdf = self._kdf._sdf.select(self._scol)
col = self._scol
col = scol_for(sdf, sdf.columns[0])

# Note that we're ignoring `None`s here for now.
# any and every was added as of Spark 3.0
Expand Down Expand Up @@ -645,7 +645,7 @@ def any(self, axis: Union[int, str] = 0) -> bool:
raise ValueError('axis should be either 0 or "index" currently.')

sdf = self._kdf._sdf.select(self._scol)
col = self._scol
col = scol_for(sdf, sdf.columns[0])

# Note that we're ignoring `None`s here for now.
# any and every was added as of Spark 3.0
Expand Down
34 changes: 34 additions & 0 deletions databricks/koalas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,40 @@ def test_isnull(self):
self.assert_eq(ks.notnull(), ps.notnull())
self.assert_eq(ks.isnull(), ps.isnull())

def test_all(self):
for ps in [pd.Series([True, True], name='x'),
pd.Series([True, False], name='x'),
pd.Series([0, 1], name='x'),
pd.Series([1, 2, 3], name='x'),
pd.Series([True, True, None], name='x'),
pd.Series([True, False, None], name='x'),
pd.Series([], name='x'),
pd.Series([np.nan], name='x')]:
ks = koalas.from_pandas(ps)
self.assert_eq(ks.all(), ps.all())

ps = pd.Series([1, 2, 3, 4], name='x')
ks = koalas.from_pandas(ps)

self.assert_eq((ks % 2 == 0).all(), (ps % 2 == 0).all())

def test_any(self):
for ps in [pd.Series([False, False], name='x'),
pd.Series([True, False], name='x'),
pd.Series([0, 1], name='x'),
pd.Series([1, 2, 3], name='x'),
pd.Series([True, True, None], name='x'),
pd.Series([True, False, None], name='x'),
pd.Series([], name='x'),
pd.Series([np.nan], name='x')]:
ks = koalas.from_pandas(ps)
self.assert_eq(ks.any(), ps.any())

ps = pd.Series([1, 2, 3, 4], name='x')
ks = koalas.from_pandas(ps)

self.assert_eq((ks % 2 == 0).any(), (ps % 2 == 0).any())

def test_sort_values(self):
ps = pd.Series([1, 2, 3, 4, 5, None, 7], name='0')
ks = koalas.from_pandas(ps)
Expand Down

0 comments on commit 73a2854

Please sign in to comment.