3131 import Queue
3232else :
3333 import queue as Queue
34+ from distutils .version import LooseVersion
3435
3536
3637# Append `SPARK_HOME/dev` to the Python path so that we can import the sparktestsupport module
3738sys .path .append (os .path .join (os .path .dirname (os .path .realpath (__file__ )), "../dev/" ))
3839
3940
4041from sparktestsupport import SPARK_HOME # noqa (suppress pep8 warnings)
41- from sparktestsupport .shellutils import which , subprocess_check_output , run_cmd # noqa
42- from sparktestsupport .modules import all_modules # noqa
42+ from sparktestsupport .shellutils import which , subprocess_check_output # noqa
43+ from sparktestsupport .modules import all_modules , pyspark_sql # noqa
4344
4445
4546python_modules = dict ((m .name , m ) for m in all_modules if m .python_test_goals if m .name != 'root' )
@@ -151,6 +152,66 @@ def parse_opts():
151152 return opts
152153
153154
155+ def _check_dependencies (python_exec , modules_to_test ):
156+ if "COVERAGE_PROCESS_START" in os .environ :
157+ # Make sure if coverage is installed.
158+ try :
159+ subprocess_check_output (
160+ [python_exec , "-c" , "import coverage" ],
161+ stderr = open (os .devnull , 'w' ))
162+ except :
163+ print_red ("Coverage is not installed in Python executable '%s' "
164+ "but 'COVERAGE_PROCESS_START' environment variable is set, "
165+ "exiting." % python_exec )
166+ sys .exit (- 1 )
167+
168+ # If we should test 'pyspark-sql', it checks if PyArrow and Pandas are installed and
169+ # explicitly prints out. See SPARK-23300.
170+ if pyspark_sql in modules_to_test :
171+ minimum_pyarrow_version = '0.8.0'
172+ minimum_pandas_version = '0.19.2'
173+
174+ try :
175+ pyarrow_version = subprocess_check_output (
176+ [python_exec , "-c" , "import pyarrow; print(pyarrow.__version__)" ],
177+ universal_newlines = True ,
178+ stderr = open (os .devnull , 'w' )).strip ()
179+ if LooseVersion (pyarrow_version ) >= LooseVersion (minimum_pyarrow_version ):
180+ LOGGER .info ("Will test PyArrow related features against Python executable "
181+ "'%s' in '%s' module." % (python_exec , pyspark_sql .name ))
182+ else :
183+ LOGGER .warning (
184+ "Will skip PyArrow related features against Python executable "
185+ "'%s' in '%s' module. PyArrow >= %s is required; however, PyArrow "
186+ "%s was found." % (
187+ python_exec , pyspark_sql .name , minimum_pyarrow_version , pyarrow_version ))
188+ except :
189+ LOGGER .warning (
190+ "Will skip PyArrow related features against Python executable "
191+ "'%s' in '%s' module. PyArrow >= %s is required; however, PyArrow "
192+ "was not found." % (python_exec , pyspark_sql .name , minimum_pyarrow_version ))
193+
194+ try :
195+ pandas_version = subprocess_check_output (
196+ [python_exec , "-c" , "import pandas; print(pandas.__version__)" ],
197+ universal_newlines = True ,
198+ stderr = open (os .devnull , 'w' )).strip ()
199+ if LooseVersion (pandas_version ) >= LooseVersion (minimum_pandas_version ):
200+ LOGGER .info ("Will test Pandas related features against Python executable "
201+ "'%s' in '%s' module." % (python_exec , pyspark_sql .name ))
202+ else :
203+ LOGGER .warning (
204+ "Will skip Pandas related features against Python executable "
205+ "'%s' in '%s' module. Pandas >= %s is required; however, Pandas "
206+ "%s was found." % (
207+ python_exec , pyspark_sql .name , minimum_pandas_version , pandas_version ))
208+ except :
209+ LOGGER .warning (
210+ "Will skip Pandas related features against Python executable "
211+ "'%s' in '%s' module. Pandas >= %s is required; however, Pandas "
212+ "was not found." % (python_exec , pyspark_sql .name , minimum_pandas_version ))
213+
214+
154215def main ():
155216 opts = parse_opts ()
156217 if (opts .verbose ):
@@ -175,9 +236,10 @@ def main():
175236
176237 task_queue = Queue .PriorityQueue ()
177238 for python_exec in python_execs :
178- if "COVERAGE_PROCESS_START" in os .environ :
179- # Make sure if coverage is installed.
180- run_cmd ([python_exec , "-c" , "import coverage" ])
239+ # Check if the python executable has proper dependencies installed to run tests
240+ # for given modules properly.
241+ _check_dependencies (python_exec , modules_to_test )
242+
181243 python_implementation = subprocess_check_output (
182244 [python_exec , "-c" , "import platform; print(platform.python_implementation())" ],
183245 universal_newlines = True ).strip ()
0 commit comments