apache · itholic · Aug 26, 2022 · Aug 26, 2022 · Aug 29, 2022 · Aug 31, 2022
diff --git a/dev/infra/Dockerfile b/dev/infra/Dockerfile
@@ -32,7 +32,7 @@ RUN $APT_INSTALL software-properties-common git libxml2-dev pkg-config curl wget
 RUN update-alternatives --set java /usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java
 
 RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.9
-RUN python3.9 -m pip install numpy pyarrow 'pandas<=1.4.4' scipy unittest-xml-reporting plotly>=4.8 sklearn 'mlflow>=1.0' coverage matplotlib
+RUN python3.9 -m pip install numpy pyarrow 'pandas<=1.4.4' scipy unittest-xml-reporting plotly>=4.8 sklearn 'mlflow>=1.0' coverage matplotlib openpyxl
 
 RUN add-apt-repository ppa:pypy/ppa
 RUN apt update

diff --git a/python/pyspark/pandas/tests/test_dataframe_conversion.py b/python/pyspark/pandas/tests/test_dataframe_conversion.py
@@ -90,7 +90,6 @@ def get_excel_dfs(pandas_on_spark_location, pandas_location):
             "expected": pd.read_excel(pandas_location, index_col=0),
         }
 
-    @unittest.skip("openpyxl")
     def test_to_excel(self):
 python_execs = [x for x in ["python3.9", "pypy3"] if which(x)] 
 python_execs = [x for x in ["python3.9", "pypy3"] if which(x)] 
         with self.temp_dir() as dirpath:
             pandas_location = dirpath + "/" + "output1.xlsx"

diff --git a/python/pyspark/pandas/tests/test_dataframe_spark_io.py b/python/pyspark/pandas/tests/test_dataframe_spark_io.py
@@ -247,6 +247,7 @@ def test_spark_io(self):
                 expected_idx.sort_values(by="f").to_spark().toPandas(),
             )
 
+    # TODO(SPARK-40353): re-enabling the `test_read_excel`.
     @unittest.skip("openpyxl")
     def test_read_excel(self):
         with self.temp_dir() as tmp: