Skip to content

Commit fe2943e

Browse files
committed
Prints out if Pandas and PyArrow are installed or not in PySpark SQL tests
1 parent 6fb3fd1 commit fe2943e

File tree

1 file changed

+67
-5
lines changed

1 file changed

+67
-5
lines changed

python/run-tests.py

Lines changed: 67 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,16 @@
3131
import Queue
3232
else:
3333
import queue as Queue
34+
from distutils.version import LooseVersion
3435

3536

3637
# Append `SPARK_HOME/dev` to the Python path so that we can import the sparktestsupport module
3738
sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../dev/"))
3839

3940

4041
from sparktestsupport import SPARK_HOME # noqa (suppress pep8 warnings)
41-
from sparktestsupport.shellutils import which, subprocess_check_output, run_cmd # noqa
42-
from sparktestsupport.modules import all_modules # noqa
42+
from sparktestsupport.shellutils import which, subprocess_check_output # noqa
43+
from sparktestsupport.modules import all_modules, pyspark_sql # noqa
4344

4445

4546
python_modules = dict((m.name, m) for m in all_modules if m.python_test_goals if m.name != 'root')
@@ -151,6 +152,66 @@ def parse_opts():
151152
return opts
152153

153154

155+
def _check_dependencies(python_exec, modules_to_test):
156+
if "COVERAGE_PROCESS_START" in os.environ:
157+
# Make sure if coverage is installed.
158+
try:
159+
subprocess_check_output(
160+
[python_exec, "-c", "import coverage"],
161+
stderr=open(os.devnull, 'w'))
162+
except:
163+
print_red("Coverage is not installed in Python executable '%s' "
164+
"but 'COVERAGE_PROCESS_START' environment variable is set, "
165+
"exiting." % python_exec)
166+
sys.exit(-1)
167+
168+
# If we should test 'pyspark-sql', it checks if PyArrow and Pandas are installed and
169+
# explicitly prints out. See SPARK-23300.
170+
if pyspark_sql in modules_to_test:
171+
minimum_pyarrow_version = '0.8.0'
172+
minimum_pandas_version = '0.19.2'
173+
174+
try:
175+
pyarrow_version = subprocess_check_output(
176+
[python_exec, "-c", "import pyarrow; print(pyarrow.__version__)"],
177+
universal_newlines=True,
178+
stderr=open(os.devnull, 'w')).strip()
179+
if LooseVersion(pyarrow_version) >= LooseVersion(minimum_pyarrow_version):
180+
LOGGER.info("Will test PyArrow related features against Python executable "
181+
"'%s' in '%s' module." % (python_exec, pyspark_sql.name))
182+
else:
183+
LOGGER.warning(
184+
"Will skip PyArrow related features against Python executable "
185+
"'%s' in '%s' module. PyArrow >= %s is required; however, PyArrow "
186+
"%s was found." % (
187+
python_exec, pyspark_sql.name, minimum_pyarrow_version, pyarrow_version))
188+
except:
189+
LOGGER.warning(
190+
"Will skip PyArrow related features against Python executable "
191+
"'%s' in '%s' module. PyArrow >= %s is required; however, PyArrow "
192+
"was not found." % (python_exec, pyspark_sql.name, minimum_pyarrow_version))
193+
194+
try:
195+
pandas_version = subprocess_check_output(
196+
[python_exec, "-c", "import pandas; print(pandas.__version__)"],
197+
universal_newlines=True,
198+
stderr=open(os.devnull, 'w')).strip()
199+
if LooseVersion(pandas_version) >= LooseVersion(minimum_pandas_version):
200+
LOGGER.info("Will test Pandas related features against Python executable "
201+
"'%s' in '%s' module." % (python_exec, pyspark_sql.name))
202+
else:
203+
LOGGER.warning(
204+
"Will skip Pandas related features against Python executable "
205+
"'%s' in '%s' module. Pandas >= %s is required; however, Pandas "
206+
"%s was found." % (
207+
python_exec, pyspark_sql.name, minimum_pandas_version, pandas_version))
208+
except:
209+
LOGGER.warning(
210+
"Will skip Pandas related features against Python executable "
211+
"'%s' in '%s' module. Pandas >= %s is required; however, Pandas "
212+
"was not found." % (python_exec, pyspark_sql.name, minimum_pandas_version))
213+
214+
154215
def main():
155216
opts = parse_opts()
156217
if (opts.verbose):
@@ -175,9 +236,10 @@ def main():
175236

176237
task_queue = Queue.PriorityQueue()
177238
for python_exec in python_execs:
178-
if "COVERAGE_PROCESS_START" in os.environ:
179-
# Make sure if coverage is installed.
180-
run_cmd([python_exec, "-c", "import coverage"])
239+
# Check if the python executable has proper dependencies installed to run tests
240+
# for given modules properly.
241+
_check_dependencies(python_exec, modules_to_test)
242+
181243
python_implementation = subprocess_check_output(
182244
[python_exec, "-c", "import platform; print(platform.python_implementation())"],
183245
universal_newlines=True).strip()

0 commit comments

Comments
 (0)