Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Series.__repr__ when Series.name is None. #1796

Merged
merged 2 commits into from
Sep 29, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions databricks/koalas/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -5494,10 +5494,18 @@ def __repr__(self):
match = REPR_PATTERN.search(prev_footer)
if match is not None:
length = match.group("length")
name = str(self.dtype.name)
footer = "\nName: {name}, dtype: {dtype}\nShowing only the first {length}".format(
length=length, name=self.name, dtype=pprint_thing(name)
)
dtype_name = str(self.dtype.name)
if self.name is None:
footer = "\ndtype: {dtype}\nShowing only the first {length}".format(
length=length, dtype=pprint_thing(dtype_name)
)
else:
footer = (
"\nName: {name}, dtype: {dtype}"
"\nShowing only the first {length}".format(
length=length, name=self.name, dtype=pprint_thing(dtype_name)
)
)
return rest + footer
return pser.to_string(name=self.name, dtype=self.dtype)

Expand Down
78 changes: 72 additions & 6 deletions databricks/koalas/tests/test_repr.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from distutils.version import LooseVersion

import numpy as np
import pyspark

from databricks import koalas as ks
from databricks.koalas.config import set_option, reset_option, option_context
Expand All @@ -40,6 +43,9 @@ def test_repr_dataframe(self):

kdf = ks.range(ReprTest.max_display_count + 1)
self.assertTrue("Showing only the first" in repr(kdf))
self.assertTrue(
repr(kdf).startswith(repr(kdf.to_pandas().head(ReprTest.max_display_count)))
)

with option_context("display.max_rows", None):
kdf = ks.range(ReprTest.max_display_count + 1)
Expand All @@ -52,24 +58,84 @@ def test_repr_series(self):

kser = ks.range(ReprTest.max_display_count + 1).id
self.assertTrue("Showing only the first" in repr(kser))
self.assertTrue(
repr(kser).startswith(repr(kser.to_pandas().head(ReprTest.max_display_count)))
)

with option_context("display.max_rows", None):
kser = ks.range(ReprTest.max_display_count + 1).id
self.assert_eq(repr(kser), repr(kser.to_pandas()))

kser = ks.range(ReprTest.max_display_count).id.rename()
self.assertTrue("Showing only the first" not in repr(kser))
self.assert_eq(repr(kser), repr(kser.to_pandas()))

kser = ks.range(ReprTest.max_display_count + 1).id.rename()
self.assertTrue("Showing only the first" in repr(kser))
self.assertTrue(
repr(kser).startswith(repr(kser.to_pandas().head(ReprTest.max_display_count)))
)

with option_context("display.max_rows", None):
kser = ks.range(ReprTest.max_display_count + 1).id.rename()
self.assert_eq(repr(kser), repr(kser.to_pandas()))

if LooseVersion(pyspark.__version__) >= LooseVersion("2.4"):
kser = ks.MultiIndex.from_tuples(
[(100 * i, i) for i in range(ReprTest.max_display_count)]
).to_series()
self.assertTrue("Showing only the first" not in repr(kser))
self.assert_eq(repr(kser), repr(kser.to_pandas()))

kser = ks.MultiIndex.from_tuples(
[(100 * i, i) for i in range(ReprTest.max_display_count + 1)]
).to_series()
self.assertTrue("Showing only the first" in repr(kser))
self.assertTrue(
repr(kser).startswith(repr(kser.to_pandas().head(ReprTest.max_display_count)))
)

with option_context("display.max_rows", None):
kser = ks.MultiIndex.from_tuples(
[(100 * i, i) for i in range(ReprTest.max_display_count + 1)]
).to_series()
self.assert_eq(repr(kser), repr(kser.to_pandas()))

def test_repr_indexes(self):
kdf = ks.range(ReprTest.max_display_count)
kidx = kdf.index
kidx = ks.range(ReprTest.max_display_count).index
self.assertTrue("Showing only the first" not in repr(kidx))
self.assert_eq(repr(kidx), repr(kidx.to_pandas()))

kdf = ks.range(ReprTest.max_display_count + 1)
kidx = kdf.index
kidx = ks.range(ReprTest.max_display_count + 1).index
self.assertTrue("Showing only the first" in repr(kidx))
self.assertTrue(
repr(kidx).startswith(
repr(kidx.to_pandas().to_series().head(ReprTest.max_display_count).index)
)
)

with option_context("display.max_rows", None):
kdf = ks.range(ReprTest.max_display_count + 1)
kidx = kdf.index
kidx = ks.range(ReprTest.max_display_count + 1).index
self.assert_eq(repr(kidx), repr(kidx.to_pandas()))

kidx = ks.MultiIndex.from_tuples([(100 * i, i) for i in range(ReprTest.max_display_count)])
self.assertTrue("Showing only the first" not in repr(kidx))
self.assert_eq(repr(kidx), repr(kidx.to_pandas()))

kidx = ks.MultiIndex.from_tuples(
[(100 * i, i) for i in range(ReprTest.max_display_count + 1)]
)
self.assertTrue("Showing only the first" in repr(kidx))
self.assertTrue(
repr(kidx).startswith(
repr(kidx.to_pandas().to_frame().head(ReprTest.max_display_count).index)
)
)

with option_context("display.max_rows", None):
kidx = ks.MultiIndex.from_tuples(
[(100 * i, i) for i in range(ReprTest.max_display_count + 1)]
)
self.assert_eq(repr(kidx), repr(kidx.to_pandas()))

def test_html_repr(self):
Expand Down
2 changes: 1 addition & 1 deletion databricks/koalas/typedef/typehints.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def as_spark_type(tpe) -> types.DataType:

def spark_type_to_pandas_dtype(spark_type):
""" Return the given Spark DataType to pandas dtype. """
if isinstance(spark_type, (types.DateType, types.UserDefinedType)):
if isinstance(spark_type, (types.DateType, types.StructType, types.UserDefinedType)):
return np.dtype("object")
elif isinstance(spark_type, types.TimestampType):
return np.dtype("datetime64[ns]")
Expand Down