Skip to content

Commit 2ba07d5

Browse files
committed
[SPARK-23300][TESTS][BRANCH-2.3] Prints out if Pandas and PyArrow are installed or not in PySpark SQL tests
This PR backports #20473 to branch-2.3. Author: hyukjinkwon <[email protected]> Closes #20533 from HyukjinKwon/backport-20473.
1 parent 05239af commit 2ba07d5

File tree

1 file changed

+55
-1
lines changed

1 file changed

+55
-1
lines changed

python/run-tests.py

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import Queue
3232
else:
3333
import queue as Queue
34+
from distutils.version import LooseVersion
3435

3536

3637
# Append `SPARK_HOME/dev` to the Python path so that we can import the sparktestsupport module
@@ -39,7 +40,7 @@
3940

4041
from sparktestsupport import SPARK_HOME # noqa (suppress pep8 warnings)
4142
from sparktestsupport.shellutils import which, subprocess_check_output # noqa
42-
from sparktestsupport.modules import all_modules # noqa
43+
from sparktestsupport.modules import all_modules, pyspark_sql # noqa
4344

4445

4546
python_modules = dict((m.name, m) for m in all_modules if m.python_test_goals if m.name != 'root')
@@ -151,6 +152,55 @@ def parse_opts():
151152
return opts
152153

153154

155+
def _check_dependencies(python_exec, modules_to_test):
156+
# If we should test 'pyspark-sql', it checks if PyArrow and Pandas are installed and
157+
# explicitly prints out. See SPARK-23300.
158+
if pyspark_sql in modules_to_test:
159+
# TODO(HyukjinKwon): Relocate and deduplicate these version specifications.
160+
minimum_pyarrow_version = '0.8.0'
161+
minimum_pandas_version = '0.19.2'
162+
163+
try:
164+
pyarrow_version = subprocess_check_output(
165+
[python_exec, "-c", "import pyarrow; print(pyarrow.__version__)"],
166+
universal_newlines=True,
167+
stderr=open(os.devnull, 'w')).strip()
168+
if LooseVersion(pyarrow_version) >= LooseVersion(minimum_pyarrow_version):
169+
LOGGER.info("Will test PyArrow related features against Python executable "
170+
"'%s' in '%s' module." % (python_exec, pyspark_sql.name))
171+
else:
172+
LOGGER.warning(
173+
"Will skip PyArrow related features against Python executable "
174+
"'%s' in '%s' module. PyArrow >= %s is required; however, PyArrow "
175+
"%s was found." % (
176+
python_exec, pyspark_sql.name, minimum_pyarrow_version, pyarrow_version))
177+
except:
178+
LOGGER.warning(
179+
"Will skip PyArrow related features against Python executable "
180+
"'%s' in '%s' module. PyArrow >= %s is required; however, PyArrow "
181+
"was not found." % (python_exec, pyspark_sql.name, minimum_pyarrow_version))
182+
183+
try:
184+
pandas_version = subprocess_check_output(
185+
[python_exec, "-c", "import pandas; print(pandas.__version__)"],
186+
universal_newlines=True,
187+
stderr=open(os.devnull, 'w')).strip()
188+
if LooseVersion(pandas_version) >= LooseVersion(minimum_pandas_version):
189+
LOGGER.info("Will test Pandas related features against Python executable "
190+
"'%s' in '%s' module." % (python_exec, pyspark_sql.name))
191+
else:
192+
LOGGER.warning(
193+
"Will skip Pandas related features against Python executable "
194+
"'%s' in '%s' module. Pandas >= %s is required; however, Pandas "
195+
"%s was found." % (
196+
python_exec, pyspark_sql.name, minimum_pandas_version, pandas_version))
197+
except:
198+
LOGGER.warning(
199+
"Will skip Pandas related features against Python executable "
200+
"'%s' in '%s' module. Pandas >= %s is required; however, Pandas "
201+
"was not found." % (python_exec, pyspark_sql.name, minimum_pandas_version))
202+
203+
154204
def main():
155205
opts = parse_opts()
156206
if (opts.verbose):
@@ -175,6 +225,10 @@ def main():
175225

176226
task_queue = Queue.PriorityQueue()
177227
for python_exec in python_execs:
228+
# Check if the python executable has proper dependencies installed to run tests
229+
# for given modules properly.
230+
_check_dependencies(python_exec, modules_to_test)
231+
178232
python_implementation = subprocess_check_output(
179233
[python_exec, "-c", "import platform; print(platform.python_implementation())"],
180234
universal_newlines=True).strip()

0 commit comments

Comments
 (0)