Skip to content

Commit 468bf3a

Browse files
authored
Move value_counts for struct into only multi-index's (#1142)
This PR address the comment at #1116 (comment)
1 parent 5a950c0 commit 468bf3a

File tree

3 files changed

+19
-16
lines changed

3 files changed

+19
-16
lines changed

databricks/koalas/base.py

+3-11
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,7 @@
2020

2121
from functools import wraps, partial
2222
from typing import Union, Callable, Any
23-
from distutils.version import LooseVersion
2423

25-
import pyspark
2624
import numpy as np
2725
import pandas as pd
2826
from pandas.api.types import is_list_like
@@ -35,7 +33,7 @@
3533
from databricks.koalas import numpy_compat
3634
from databricks.koalas.internal import _InternalFrame, SPARK_INDEX_NAME_FORMAT
3735
from databricks.koalas.typedef import pandas_wraps, spark_type_to_pandas_dtype
38-
from databricks.koalas.utils import align_diff_series, scol_for, validate_axis, default_session
36+
from databricks.koalas.utils import align_diff_series, scol_for, validate_axis
3937
from databricks.koalas.frame import DataFrame
4038

4139

@@ -948,14 +946,8 @@ def value_counts(self, normalize=False, sort=True, ascending=False, bins=None, d
948946
3 1
949947
Name: koalas, dtype: int64
950948
"""
951-
from databricks.koalas.series import Series, _col
952-
from databricks.koalas.indexes import MultiIndex
953-
if LooseVersion(pyspark.__version__) < LooseVersion("2.4") and \
954-
default_session().conf.get("spark.sql.execution.arrow.enabled") == "true" and \
955-
isinstance(self, MultiIndex):
956-
raise RuntimeError("if you're using pyspark < 2.4, set conf "
957-
"'spark.sql.execution.arrow.enabled' to 'false' "
958-
"for using this function with MultiIndex")
949+
from databricks.koalas.series import _col
950+
959951
if bins is not None:
960952
raise NotImplementedError("value_counts currently does not support bins")
961953

databricks/koalas/indexes.py

+16-4
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717
"""
1818
Wrappers for Indexes to behave similar to pandas Index, MultiIndex.
1919
"""
20-
21-
from functools import partial, reduce
20+
from distutils.version import LooseVersion
21+
from functools import partial
2222
from typing import Any, List, Optional, Tuple, Union
2323

2424
import pandas as pd
@@ -27,6 +27,7 @@
2727
is_categorical_dtype, is_integer_dtype, is_float_dtype, is_numeric_dtype, is_object_dtype
2828
from pandas.io.formats.printing import pprint_thing
2929

30+
import pyspark
3031
from pyspark import sql as spark
3132
from pyspark.sql import functions as F
3233

@@ -35,10 +36,9 @@
3536
from databricks.koalas.exceptions import PandasNotImplementedError
3637
from databricks.koalas.base import IndexOpsMixin
3738
from databricks.koalas.frame import DataFrame
38-
from databricks.koalas.internal import _InternalFrame
3939
from databricks.koalas.missing.indexes import _MissingPandasLikeIndex, _MissingPandasLikeMultiIndex
4040
from databricks.koalas.series import Series
41-
from databricks.koalas.utils import name_like_string
41+
from databricks.koalas.utils import name_like_string, default_session
4242
from databricks.koalas.internal import _InternalFrame
4343

4444

@@ -1159,6 +1159,18 @@ def symmetric_difference(self, other, result_name=None, sort=None):
11591159

11601160
return result
11611161

1162+
def value_counts(self, normalize=False, sort=True, ascending=False, bins=None, dropna=True):
1163+
if LooseVersion(pyspark.__version__) < LooseVersion("2.4") and \
1164+
default_session().conf.get("spark.sql.execution.arrow.enabled") == "true" and \
1165+
isinstance(self, MultiIndex):
1166+
raise RuntimeError("if you're using pyspark < 2.4, set conf "
1167+
"'spark.sql.execution.arrow.enabled' to 'false' "
1168+
"for using this function with MultiIndex")
1169+
return super(MultiIndex, self).value_counts(
1170+
normalize=normalize, sort=sort, ascending=ascending, bins=bins, dropna=dropna)
1171+
1172+
value_counts.__doc__ = IndexOpsMixin.value_counts.__doc__
1173+
11621174
def __getattr__(self, item: str) -> Any:
11631175
if hasattr(_MissingPandasLikeMultiIndex, item):
11641176
property_or_func = getattr(_MissingPandasLikeMultiIndex, item)

databricks/koalas/tests/test_series.py

-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333
from databricks.koalas.testing.utils import ReusedSQLTestCase, SQLTestUtils
3434
from databricks.koalas.exceptions import PandasNotImplementedError
3535
from databricks.koalas.missing.series import _MissingPandasLikeSeries
36-
from databricks.koalas.config import set_option, reset_option
3736
from databricks.koalas.utils import default_session
3837

3938

0 commit comments

Comments
 (0)