Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion dev/run-tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ def determine_modules_to_test(changed_modules):
['graphx', 'examples']
>>> x = [x.name for x in determine_modules_to_test([modules.sql])]
>>> x # doctest: +NORMALIZE_WHITESPACE
... # doctest: +SKIP
['sql', 'avro', 'hive', 'mllib', 'sql-kafka-0-10', 'examples', 'hive-thriftserver',
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

l manually tested with HADOOP_PROFILE=hadoop3.2 python -m doctest run-tests.py and python -m doctest run-tests.py

'pyspark-sql', 'repl', 'sparkr', 'pyspark-mllib', 'pyspark-ml']
"""
Expand All @@ -122,9 +123,15 @@ def determine_modules_to_test(changed_modules):
# If we need to run all of the tests, then we should short-circuit and return 'root'
if modules.root in modules_to_test:
return [modules.root]
return toposort_flatten(
changed_modules = toposort_flatten(
{m: set(m.dependencies).intersection(modules_to_test) for m in modules_to_test}, sort=True)

# TODO: Skip hive-thriftserver module for hadoop-3.2. remove this once hadoop-3.2 support it
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just keeping this change isn't enough because build and some other checks use profiles from root's build_profile_flags.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK. Thank you @HyukjinKwon

Copy link
Member Author

@HyukjinKwon HyukjinKwon May 20, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To be clear, this fix itself is a temp fix too in order to make the affected scope isolated and minimised. To fix it properly, we should maybe be able to fix it within Module at modules.py (and remove this entire module conditionally)

if modules.hadoop_version == "hadoop3.2":
changed_modules = [m for m in changed_modules if m.name != "hive-thriftserver"]

return changed_modules


def determine_tags_to_exclude(changed_modules):
tags = []
Expand Down
21 changes: 10 additions & 11 deletions dev/sparktestsupport/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,6 @@
import re
import os

if os.environ.get("AMPLAB_JENKINS"):
hadoop_version = os.environ.get("AMPLAB_JENKINS_BUILD_PROFILE", "hadoop2.7")
else:
hadoop_version = os.environ.get("HADOOP_PROFILE", "hadoop2.7")
print("[info] Choosing supported modules with Hadoop profile", hadoop_version)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since this is a partial revert of SPARK-27402, ping @wangyum .

all_modules = []


Expand Down Expand Up @@ -80,11 +74,7 @@ def __init__(self, name, dependencies, source_file_regexes, build_profile_flags=
self.dependent_modules = set()
for dep in dependencies:
dep.dependent_modules.add(self)
# TODO: Skip hive-thriftserver module for hadoop-3.2. remove this once hadoop-3.2 support it
if name == "hive-thriftserver" and hadoop_version == "hadoop3.2":
print("[info] Skip unsupported module:", name)
else:
all_modules.append(self)
all_modules.append(self)

def contains_file(self, filename):
return any(re.match(p, filename) for p in self.source_file_prefixes)
Expand Down Expand Up @@ -568,6 +558,15 @@ def __hash__(self):
]
)

# TODO: Skip hive-thriftserver module for hadoop-3.2. remove this once hadoop-3.2 support it
if os.environ.get("AMPLAB_JENKINS"):
hadoop_version = os.environ.get("AMPLAB_JENKINS_BUILD_PROFILE", "hadoop2.7")
else:
hadoop_version = os.environ.get("HADOOP_PROFILE", "hadoop2.7")
if hadoop_version == "hadoop3.2":
print("[info] Skip unsupported module:", "hive-thriftserver")
all_modules = [m for m in all_modules if m.name != "hive-thriftserver"]
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similarily, I tested all_modules. This is just to move the codes to remove later in single place.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This just does the same thing as before, right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yea I just moved.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it. Thank you.


# The root module is a dummy module which is used to run all of the tests.
# No other modules should directly depend on this module.
root = Module(
Expand Down