diff --git a/databricks/koalas/series.py b/databricks/koalas/series.py index 75b64b177a..e08a36981d 100644 --- a/databricks/koalas/series.py +++ b/databricks/koalas/series.py @@ -5494,10 +5494,18 @@ def __repr__(self): match = REPR_PATTERN.search(prev_footer) if match is not None: length = match.group("length") - name = str(self.dtype.name) - footer = "\nName: {name}, dtype: {dtype}\nShowing only the first {length}".format( - length=length, name=self.name, dtype=pprint_thing(name) - ) + dtype_name = str(self.dtype.name) + if self.name is None: + footer = "\ndtype: {dtype}\nShowing only the first {length}".format( + length=length, dtype=pprint_thing(dtype_name) + ) + else: + footer = ( + "\nName: {name}, dtype: {dtype}" + "\nShowing only the first {length}".format( + length=length, name=self.name, dtype=pprint_thing(dtype_name) + ) + ) return rest + footer return pser.to_string(name=self.name, dtype=self.dtype) diff --git a/databricks/koalas/tests/test_repr.py b/databricks/koalas/tests/test_repr.py index b8644e39b1..346451b6e0 100644 --- a/databricks/koalas/tests/test_repr.py +++ b/databricks/koalas/tests/test_repr.py @@ -13,7 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from distutils.version import LooseVersion + import numpy as np +import pyspark from databricks import koalas as ks from databricks.koalas.config import set_option, reset_option, option_context @@ -40,6 +43,9 @@ def test_repr_dataframe(self): kdf = ks.range(ReprTest.max_display_count + 1) self.assertTrue("Showing only the first" in repr(kdf)) + self.assertTrue( + repr(kdf).startswith(repr(kdf.to_pandas().head(ReprTest.max_display_count))) + ) with option_context("display.max_rows", None): kdf = ks.range(ReprTest.max_display_count + 1) @@ -52,24 +58,84 @@ def test_repr_series(self): kser = ks.range(ReprTest.max_display_count + 1).id self.assertTrue("Showing only the first" in repr(kser)) + self.assertTrue( + repr(kser).startswith(repr(kser.to_pandas().head(ReprTest.max_display_count))) + ) with option_context("display.max_rows", None): kser = ks.range(ReprTest.max_display_count + 1).id self.assert_eq(repr(kser), repr(kser.to_pandas())) + kser = ks.range(ReprTest.max_display_count).id.rename() + self.assertTrue("Showing only the first" not in repr(kser)) + self.assert_eq(repr(kser), repr(kser.to_pandas())) + + kser = ks.range(ReprTest.max_display_count + 1).id.rename() + self.assertTrue("Showing only the first" in repr(kser)) + self.assertTrue( + repr(kser).startswith(repr(kser.to_pandas().head(ReprTest.max_display_count))) + ) + + with option_context("display.max_rows", None): + kser = ks.range(ReprTest.max_display_count + 1).id.rename() + self.assert_eq(repr(kser), repr(kser.to_pandas())) + + if LooseVersion(pyspark.__version__) >= LooseVersion("2.4"): + kser = ks.MultiIndex.from_tuples( + [(100 * i, i) for i in range(ReprTest.max_display_count)] + ).to_series() + self.assertTrue("Showing only the first" not in repr(kser)) + self.assert_eq(repr(kser), repr(kser.to_pandas())) + + kser = ks.MultiIndex.from_tuples( + [(100 * i, i) for i in range(ReprTest.max_display_count + 1)] + ).to_series() + self.assertTrue("Showing only the first" in repr(kser)) + self.assertTrue( + repr(kser).startswith(repr(kser.to_pandas().head(ReprTest.max_display_count))) + ) + + with option_context("display.max_rows", None): + kser = ks.MultiIndex.from_tuples( + [(100 * i, i) for i in range(ReprTest.max_display_count + 1)] + ).to_series() + self.assert_eq(repr(kser), repr(kser.to_pandas())) + def test_repr_indexes(self): - kdf = ks.range(ReprTest.max_display_count) - kidx = kdf.index + kidx = ks.range(ReprTest.max_display_count).index self.assertTrue("Showing only the first" not in repr(kidx)) self.assert_eq(repr(kidx), repr(kidx.to_pandas())) - kdf = ks.range(ReprTest.max_display_count + 1) - kidx = kdf.index + kidx = ks.range(ReprTest.max_display_count + 1).index self.assertTrue("Showing only the first" in repr(kidx)) + self.assertTrue( + repr(kidx).startswith( + repr(kidx.to_pandas().to_series().head(ReprTest.max_display_count).index) + ) + ) with option_context("display.max_rows", None): - kdf = ks.range(ReprTest.max_display_count + 1) - kidx = kdf.index + kidx = ks.range(ReprTest.max_display_count + 1).index + self.assert_eq(repr(kidx), repr(kidx.to_pandas())) + + kidx = ks.MultiIndex.from_tuples([(100 * i, i) for i in range(ReprTest.max_display_count)]) + self.assertTrue("Showing only the first" not in repr(kidx)) + self.assert_eq(repr(kidx), repr(kidx.to_pandas())) + + kidx = ks.MultiIndex.from_tuples( + [(100 * i, i) for i in range(ReprTest.max_display_count + 1)] + ) + self.assertTrue("Showing only the first" in repr(kidx)) + self.assertTrue( + repr(kidx).startswith( + repr(kidx.to_pandas().to_frame().head(ReprTest.max_display_count).index) + ) + ) + + with option_context("display.max_rows", None): + kidx = ks.MultiIndex.from_tuples( + [(100 * i, i) for i in range(ReprTest.max_display_count + 1)] + ) self.assert_eq(repr(kidx), repr(kidx.to_pandas())) def test_html_repr(self): diff --git a/databricks/koalas/typedef/typehints.py b/databricks/koalas/typedef/typehints.py index 7a96c85631..8c8b794e57 100644 --- a/databricks/koalas/typedef/typehints.py +++ b/databricks/koalas/typedef/typehints.py @@ -127,7 +127,7 @@ def as_spark_type(tpe) -> types.DataType: def spark_type_to_pandas_dtype(spark_type): """ Return the given Spark DataType to pandas dtype. """ - if isinstance(spark_type, (types.DateType, types.UserDefinedType)): + if isinstance(spark_type, (types.DateType, types.StructType, types.UserDefinedType)): return np.dtype("object") elif isinstance(spark_type, types.TimestampType): return np.dtype("datetime64[ns]")