Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 66 additions & 28 deletions .github/bin/build-pt-matrix-from-impacted-connectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ def main():
"--impacted-features",
type=argparse.FileType("r"),
dest="impacted_features",
default="impacted-features.log",
help="List of impacted features, one per line",
)
parser.add_argument(
Expand Down Expand Up @@ -105,24 +104,24 @@ def load_available_features_for_config(config, suites, ptl_binary_path):
)
logging.debug("ptl suite describe: %s", process)
if process.returncode != 0:
logging.error("ptl suite describe failed: %s", process)
return {}
raise RuntimeError(f"ptl suite describe failed: {process}")
for line in process.stdout.splitlines():
if line.startswith("{"):
logging.debug("Parsing JSON: %s", line)
ptl_output = json.loads(line)
logging.debug("Handling JSON object: %s", ptl_output)
config_features = {}
for suite in ptl_output.get("suites", []):
key = (config, suite.get("name"))
value = set()
for testRun in suite.get("testRuns", []):
for connector in testRun["environment"].get("features", []):
value.add(connector)
config_features[key] = value
if not line.startswith("{"):
continue
logging.debug("Parsing JSON: %s", line)
ptl_output = json.loads(line)
logging.debug("Handling JSON object: %s", ptl_output)
config_features = {}
for suite in ptl_output.get("suites", []):
key = (config, suite.get("name"))
value = set()
for testRun in suite.get("testRuns", []):
for connector in testRun["environment"].get("features", []):
value.add(connector)
config_features[key] = value

logging.debug("config_features: %s", config_features)
return config_features
logging.debug("config_features: %s", config_features)
return config_features
logging.error("ptl suite describe hasn't returned any JSON line: %s", process)
return {}

Expand All @@ -142,10 +141,16 @@ def tested_features(available_connectors, config, suite):

def build(matrix_file, impacted_file, output_file, ptl_binary_path):
matrix = yaml.load(matrix_file, Loader=yaml.Loader)
logging.info("Read matrix: %s", matrix)
if impacted_file is None:
logging.info("No impacted_features, not applying any changes to matrix")
json.dump(matrix, output_file)
output_file.write("\n")
return

impacted_features = list(
filter(None, [line.rstrip() for line in impacted_file.readlines()])
)
logging.info("Read matrix: %s", matrix)
logging.info("Read impacted_features: %s", impacted_features)
result = copy.copy(matrix)
items = expand_matrix(matrix)
Expand All @@ -156,27 +161,56 @@ def build(matrix_file, impacted_file, output_file, ptl_binary_path):
configToSuiteMap[item.get("config")].append(item.get("suite"))
available_features = load_available_features(configToSuiteMap, ptl_binary_path)
if len(available_features) > 0:
all_excluded = True
for item in items:
features = tested_features(
available_features, item.get("config"), item.get("suite")
)
logging.debug(
"impacted_features: %s, features: %s", impacted_features, features
)
logging.debug("matrix item features: %s", features)
if not any(connector in impacted_features for connector in features):
logging.info("Excluding matrix entry due to features: %s", item)
result.setdefault("exclude", []).append(item)
if "include" in result and item in result["include"]:
logging.debug("Removing from include list: %s", item)
result["include"].remove(item)
else:
all_excluded = False
if all_excluded:
# if every single item in the matrix is excluded, write an empty matrix
output_file.write("{}\n")
return
json.dump(result, output_file)
output_file.write("\n")


class TestBuild(unittest.TestCase):
def test_build(self):
self.maxDiff = None
# cases are tuples of: matrix, impacted, ptl_binary_path, expected
cases = [
# impacted features empty, all items are excluded and an empty matrix is returned
(
{
"config": ["A", "B", "C"],
"suite": ["1", "2", "3"],
},
[],
".github/bin/fake-ptl",
{},
),
# no impacted features, ptl is not called, no changes to matrix
(
{
"config": ["A", "B", "C"],
"suite": ["1", "2", "3"],
},
None,
"invalid",
{
"config": ["A", "B", "C"],
"suite": ["1", "2", "3"],
},
),
# missing features get added to exclude list
(
{
Expand Down Expand Up @@ -266,12 +300,11 @@ def test_build(self):
(
# input matrix
{
"config": ["default", "hdp3", "cdh5"],
"config": ["default", "hdp3"],
"suite": ["suite-1", "suite-2", "suite-3", "suite-5"],
"jdk": ["11"],
"ignore exclusion if": [False],
"exclude": [
{"config": "cdh5", "ignore exclusion if": True},
{"config": "default", "ignore exclusion if": False},
],
"include": [
Expand Down Expand Up @@ -313,9 +346,8 @@ def test_build(self):
"testing/bin/ptl",
# expected matrix
{
"config": ["default", "hdp3", "cdh5"],
"config": ["default", "hdp3"],
"exclude": [
{"config": "cdh5", "ignore exclusion if": True},
{"config": "default", "ignore exclusion if": False},
{"config": "default", "jdk": "11", "suite": "suite-oauth2"},
],
Expand Down Expand Up @@ -368,10 +400,16 @@ def test_build(self):
# given
yaml.dump(matrix, matrix_file)
matrix_file.seek(0)
impacted_file.write("\n".join(impacted))
impacted_file.seek(0)
if impacted is not None:
impacted_file.write("\n".join(impacted))
impacted_file.seek(0)
impacted_file_final = impacted_file
else:
impacted_file_final = None
# when
build(matrix_file, impacted_file, output_file, ptl_binary_path)
build(
matrix_file, impacted_file_final, output_file, ptl_binary_path
)
output_file.seek(0)
output = json.load(output_file)
# then
Expand Down
19 changes: 18 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -728,7 +728,7 @@ jobs:
# actual value of the property, and will therefore not be excluded.
# - If the expression evaluates to false, it will match the actual
# value of the property, and the exclusion will apply normally.
- false
- "false"
include:
# this suite is not meant to be run with different configs
- config: default
Expand Down Expand Up @@ -771,6 +771,22 @@ jobs:
./.github/bin/build-pt-matrix-from-impacted-connectors.py -v -m .github/test-pt-matrix.yaml -o matrix.json
- name: Build PT matrix (impacted-features)
if: steps.filter.outputs.product-tests == 'false' && !contains(github.event.pull_request.labels.*.name, 'tests:all') && !contains(github.event.pull_request.labels.*.name, 'product-tests:all')
# all these envs are required to be set by some product test environments
env:
ABFS_CONTAINER:
Copy link
Copy Markdown
Member Author

@nineinchnick nineinchnick Oct 9, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@findinpath @alexjo2144 just FYI, it's a bit weird we have to provide so many variables just to describe the environment:

DATABRICKS_73_JDBC_URL= \
DATABRICKS_91_JDBC_URL= \
DATABRICKS_104_JDBC_URL= \
DATABRICKS_LOGIN= DATABRICKS_TOKEN= \
AWS_REGION= \
S3_BUCKET= \
DATABRICKS_AWS_ACCESS_KEY_ID= \
DATABRICKS_AWS_SECRET_ACCESS_KEY= \
testing/bin/ptl suite describe --suite suite-delta-lake-databricks --config config-default

but I do realize using requireNonNull allows for providing an actionable error message. I'm not sure how to delay getting the actual values to when the env is started, not just created, so we can keep this as is.

Copy link
Copy Markdown
Member

@electrum electrum Oct 9, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If someone adds a new environment variable and forgets to add it here, what happens? Should we fail the build in that case?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I made sure failures like that are not silently ignored.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DATABRICKS_XX_JDBC_URL corresponds to the Databricks cluster. There are 3 clusters we check for compatibility against. The suite contains correspondingly 3 environments against which we test Delta Lake.

AWS related settings are needed for testing against AWS S3 & Glue (the metastore for the Delta Lake tests).

If someone adds a new environment variable and forgets to add it here, what happens ?

If the env variable is not related to the product tests it will simply be ignored.

ABFS_ACCOUNT:
ABFS_ACCESS_KEY:
S3_BUCKET:
AWS_REGION:
DATABRICKS_AWS_ACCESS_KEY_ID:
DATABRICKS_AWS_SECRET_ACCESS_KEY:
DATABRICKS_73_JDBC_URL:
DATABRICKS_91_JDBC_URL:
DATABRICKS_104_JDBC_URL:
DATABRICKS_LOGIN:
DATABRICKS_TOKEN:
GCP_CREDENTIALS_KEY:
GCP_STORAGE_BUCKET:
run: |
# converts filtered YAML file into JSON
./.github/bin/build-pt-matrix-from-impacted-connectors.py -v -m .github/test-pt-matrix.yaml -i impacted-features.log -o matrix.json
Expand All @@ -787,6 +803,7 @@ jobs:
runs-on: ubuntu-latest
# explicitly define the name to avoid adding the value of the `ignore exclusion if` matrix item
name: pt (${{ matrix.config }}, ${{ matrix.suite }}, ${{ matrix.jdk }})
if: needs.build-pt.outputs.matrix != '{}'
strategy:
fail-fast: false
matrix: ${{ fromJson(needs.build-pt.outputs.matrix) }}
Expand Down