Skip to content

Commit b81afcc

Browse files
authored
Implement (DataFrame|Series).plot.pie in plotly (#1971)
This PR implements `DataFrame.plot.pie` in plotly as below: ```python from databricks import koalas as ks kdf = ks.DataFrame( {'a': [1, 2, 3, 4, 5, 6], 'b': [100, 200, 300, 400, 500, 600]}, index=[10, 20, 30, 40, 50, 60]) ks.options.plotting.backend = 'plotly' kdf.plot.pie(y="b") ``` ![Screen Shot 2020-12-17 at 3 28 12 PM](https://user-images.githubusercontent.com/6477701/102451779-87005380-407c-11eb-85f3-aa2d8e62c991.png) Binder to test: https://mybinder.org/v2/gh/HyukjinKwon/koalas/plotly-pie?filepath=docs%2Fsource%2Fgetting_started%2F10min.ipynb
1 parent 37f7e50 commit b81afcc

File tree

4 files changed

+129
-5
lines changed

4 files changed

+129
-5
lines changed

databricks/koalas/plot/core.py

+17-5
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,12 @@ def _get_plot_backend(backend=None):
173173
return KoalasPlotAccessor._backends[backend]
174174

175175
module = KoalasPlotAccessor._find_backend(backend)
176+
177+
if backend == "plotly":
178+
from databricks.koalas.plot.plotly import plot_plotly
179+
180+
module.plot = plot_plotly(module.plot)
181+
176182
KoalasPlotAccessor._backends[backend] = module
177183
return module
178184

@@ -714,7 +720,7 @@ def area(self, x=None, y=None, **kwds):
714720
elif isinstance(self.data, DataFrame):
715721
return self(kind="area", x=x, y=y, **kwds)
716722

717-
def pie(self, y=None, **kwds):
723+
def pie(self, **kwds):
718724
"""
719725
Generate a pie plot.
720726
@@ -728,7 +734,7 @@ def pie(self, y=None, **kwds):
728734
----------
729735
y : int or label, optional
730736
Label or position of the column to plot.
731-
If not provided, ``subplots=True`` argument must be passed.
737+
If not provided, ``subplots=True`` argument must be passed (matplotlib-only).
732738
**kwds
733739
Keyword arguments to pass on to :meth:`Koalas.Series.plot`.
734740
@@ -764,9 +770,15 @@ def pie(self, y=None, **kwds):
764770
return self(kind="pie", **kwds)
765771
else:
766772
# pandas will raise an error if y is None and subplots if not True
767-
if isinstance(self.data, DataFrame) and y is None and not kwds.get("subplots", False):
768-
raise ValueError("pie requires either y column or 'subplots=True'")
769-
return self(kind="pie", y=y, **kwds)
773+
if (
774+
isinstance(self.data, DataFrame)
775+
and kwds.get("y", None) is None
776+
and not kwds.get("subplots", False)
777+
):
778+
raise ValueError(
779+
"pie requires either y column or 'subplots=True' (matplotlib-only)"
780+
)
781+
return self(kind="pie", **kwds)
770782

771783
def scatter(self, x, y, **kwds):
772784
"""

databricks/koalas/plot/plotly.py

+51
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#
2+
# Copyright (C) 2019 Databricks, Inc.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
#
16+
import pandas as pd
17+
18+
19+
def plot_plotly(origin_plot):
20+
def plot(data, kind, **kwargs):
21+
# Koalas specific plots
22+
if kind == "pie":
23+
return plot_pie(data, **kwargs)
24+
25+
# Other plots.
26+
return origin_plot(data, kind, **kwargs)
27+
28+
return plot
29+
30+
31+
def plot_pie(data, **kwargs):
32+
from plotly import express
33+
34+
if isinstance(data, pd.Series):
35+
pdf = data.to_frame()
36+
return express.pie(pdf, values=pdf.columns[0], names=pdf.index, **kwargs)
37+
elif isinstance(data, pd.DataFrame):
38+
# DataFrame
39+
values = kwargs.pop("y", None)
40+
default_names = None
41+
if values is not None:
42+
default_names = data.index
43+
44+
return express.pie(
45+
data,
46+
values=kwargs.pop("values", values),
47+
names=kwargs.pop("names", default_names),
48+
**kwargs
49+
)
50+
else:
51+
raise RuntimeError("Unexpected type: [%s]" % type(data))

databricks/koalas/tests/plot/test_frame_plot_plotly.py

+31
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
import pandas as pd
2020
import numpy as np
21+
from plotly import express
2122

2223
from databricks import koalas as ks
2324
from databricks.koalas.config import set_option, reset_option
@@ -143,3 +144,33 @@ def check_scatter_plot(pdf, kdf, x, y, c):
143144
pdf1 = pd.DataFrame(np.random.rand(50, 4), columns=["a", "b", "c", "d"])
144145
kdf1 = ks.from_pandas(pdf1)
145146
check_scatter_plot(pdf1, kdf1, x="a", y="b", c="c")
147+
148+
def test_pie_plot(self):
149+
def check_pie_plot(kdf):
150+
pdf = kdf.to_pandas()
151+
self.assertEqual(
152+
kdf.plot(kind="pie", y=kdf.columns[0]),
153+
express.pie(pdf, values="a", names=pdf.index),
154+
)
155+
156+
self.assertEqual(
157+
kdf.plot(kind="pie", values="a"), express.pie(pdf, values="a"),
158+
)
159+
160+
kdf1 = self.kdf1
161+
check_pie_plot(kdf1)
162+
163+
# TODO: support multi-index columns
164+
# columns = pd.MultiIndex.from_tuples([("x", "y"), ("y", "z")])
165+
# kdf1.columns = columns
166+
# check_pie_plot(kdf1)
167+
168+
# TODO: support multi-index
169+
# kdf1 = ks.DataFrame(
170+
# {
171+
# "a": [1, 2, 3, 4, 5, 6, 7, 8, 9, 15, 50],
172+
# "b": [2, 3, 4, 5, 7, 9, 10, 15, 34, 45, 49]
173+
# },
174+
# index=pd.MultiIndex.from_tuples([("x", "y")] * 11),
175+
# )
176+
# check_pie_plot(kdf1)

databricks/koalas/tests/plot/test_series_plot_plotly.py

+30
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from distutils.version import LooseVersion
1818

1919
import pandas as pd
20+
from plotly import express
2021

2122
from databricks import koalas as ks
2223
from databricks.koalas.config import set_option, reset_option
@@ -98,3 +99,32 @@ def test_area_plot(self):
9899

99100
# just a sanity check for df.col type
100101
self.assertEqual(pdf.sales.plot(kind="area"), kdf.sales.plot(kind="area"))
102+
103+
def test_pie_plot(self):
104+
kdf = self.kdf1
105+
pdf = kdf.to_pandas()
106+
self.assertEqual(
107+
kdf["a"].plot(kind="pie"), express.pie(pdf, values=pdf.columns[0], names=pdf.index),
108+
)
109+
110+
# TODO: support multi-index columns
111+
# columns = pd.MultiIndex.from_tuples([("x", "y")])
112+
# kdf.columns = columns
113+
# pdf.columns = columns
114+
# self.assertEqual(
115+
# kdf[("x", "y")].plot(kind="pie"),
116+
# express.pie(pdf, values=pdf.iloc[:, 0].to_numpy(), names=pdf.index.to_numpy()),
117+
# )
118+
119+
# TODO: support multi-index
120+
# kdf = ks.DataFrame(
121+
# {
122+
# "a": [1, 2, 3, 4, 5, 6, 7, 8, 9, 15, 50],
123+
# "b": [2, 3, 4, 5, 7, 9, 10, 15, 34, 45, 49]
124+
# },
125+
# index=pd.MultiIndex.from_tuples([("x", "y")] * 11),
126+
# )
127+
# pdf = kdf.to_pandas()
128+
# self.assertEqual(
129+
# kdf["a"].plot(kind="pie"), express.pie(pdf, values=pdf.columns[0], names=pdf.index),
130+
# )

0 commit comments

Comments
 (0)