diff --git a/python/ray/dataframe/dataframe.py b/python/ray/dataframe/dataframe.py index dd73c0760bbb..61cf4a6d8e34 100644 --- a/python/ray/dataframe/dataframe.py +++ b/python/ray/dataframe/dataframe.py @@ -776,9 +776,20 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None, DataFrame with the dropna applied. """ if is_list_like(axis): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + result = self + # TODO(kunalgosar): this builds an intermediate dataframe, + # which does unnecessary computation + for ax in axis: + result = result.dropna( + axis=ax, how=how, thresh=thresh, subset=subset) + if not inplace: + return result + + return self._update_inplace( + block_partitions=result._block_partitions, + columns=result.columns, + index=result.index + ) axis = pd.DataFrame()._get_axis_number(axis) inplace = validate_bool_kwarg(inplace, "inplace") diff --git a/python/ray/dataframe/test/test_dataframe.py b/python/ray/dataframe/test/test_dataframe.py index fa29762c2b06..51698e392b35 100644 --- a/python/ray/dataframe/test/test_dataframe.py +++ b/python/ray/dataframe/test/test_dataframe.py @@ -839,6 +839,8 @@ def test_dense_nan_df(): test_dropna(ray_df, pd_df) test_dropna_inplace(ray_df, pd_df) + test_dropna_multiple_axes(ray_df, pd_df) + test_dropna_multiple_axes_inplace(ray_df, pd_df) @pytest.fixture @@ -1297,16 +1299,17 @@ def test_drop_duplicates(): @pytest.fixture def test_dropna(ray_df, pd_df): - ray_df_equals_pandas(ray_df.dropna(axis=1, how='all'), - pd_df.dropna(axis=1, how='all')) + assert ray_df_equals_pandas(ray_df.dropna(axis=1, how='all'), + pd_df.dropna(axis=1, how='all')) - ray_df_equals_pandas(ray_df.dropna(axis=1, how='any'), - pd_df.dropna(axis=1, how='any')) + assert ray_df_equals_pandas(ray_df.dropna(axis=1, how='any'), + pd_df.dropna(axis=1, how='any')) - ray_df_equals_pandas(ray_df.dropna(axis=0, how='all'), - pd_df.dropna(axis=0, how='all')) + assert ray_df_equals_pandas(ray_df.dropna(axis=0, how='all'), + pd_df.dropna(axis=0, how='all')) - ray_df_equals_pandas(ray_df.dropna(thresh=2), pd_df.dropna(thresh=2)) + assert ray_df_equals_pandas(ray_df.dropna(thresh=2), + pd_df.dropna(thresh=2)) @pytest.fixture @@ -1317,12 +1320,43 @@ def test_dropna_inplace(ray_df, pd_df): ray_df.dropna(thresh=2, inplace=True) pd_df.dropna(thresh=2, inplace=True) - ray_df_equals_pandas(ray_df, pd_df) + assert ray_df_equals_pandas(ray_df, pd_df) ray_df.dropna(axis=1, how='any', inplace=True) pd_df.dropna(axis=1, how='any', inplace=True) - ray_df_equals_pandas(ray_df, pd_df) + assert ray_df_equals_pandas(ray_df, pd_df) + + +@pytest.fixture +def test_dropna_multiple_axes(ray_df, pd_df): + assert ray_df_equals_pandas( + ray_df.dropna(how='all', axis=[0, 1]), + pd_df.dropna(how='all', axis=[0, 1]) + ) + assert ray_df_equals_pandas( + ray_df.dropna(how='all', axis=(0, 1)), + pd_df.dropna(how='all', axis=(0, 1)) + ) + + +@pytest.fixture +def test_dropna_multiple_axes_inplace(ray_df, pd_df): + ray_df_copy = ray_df.copy() + pd_df_copy = pd_df.copy() + + ray_df_copy.dropna(how='all', axis=[0, 1], inplace=True) + pd_df_copy.dropna(how='all', axis=[0, 1], inplace=True) + + assert ray_df_equals_pandas(ray_df_copy, pd_df_copy) + + ray_df_copy = ray_df.copy() + pd_df_copy = pd_df.copy() + + ray_df_copy.dropna(how='all', axis=(0, 1), inplace=True) + pd_df_copy.dropna(how='all', axis=(0, 1), inplace=True) + + assert ray_df_equals_pandas(ray_df_copy, pd_df_copy) def test_duplicated():