From b2d06578a9a289f38b9e2c9c343c797b817b147c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20Rives=20Gamb=C3=ADn?= Date: Wed, 25 Oct 2023 13:19:54 +0000 Subject: [PATCH] Merged in MP18-tests (pull request #11) MP18 tests for local setup * delete kubeflow manifest test so they don't get triggered when running pytest * fix kubeflow manifest * add pytests for the local setup * remove unused --- config.env | 3 + .../pipelines-profile-controller/test_sync.py | 286 ------------- .../pipelines-profile-controller/test_sync.py | 286 ------------- .../pipelines-profile-controller/test_sync.py | 286 ------------- .../contrib/kserve/tests/test_sklearn.py | 58 --- .../in-cluster-setup/kustomization.yaml | 2 +- .../proposals/20200913-rootlessKubeflow.md | 50 --- .../20220926-contrib-component-guidelines.md | 65 --- .../proposals/20230323-end-to-end-testing.md | 395 ------------------ .../kubeflow/manifests/proposals/README.md | 16 - deployment/kubeflow/manifests/tests/README.md | 27 -- .../kubeflow/manifests/tests/e2e/.gitignore | 1 - .../kubeflow/manifests/tests/e2e/README.md | 27 -- .../tests/e2e/hack/cleanup_proxies.sh | 10 - .../manifests/tests/e2e/hack/cleanup_yamls.sh | 16 - .../manifests/tests/e2e/hack/proxy_istio.sh | 18 - .../tests/e2e/hack/proxy_pipelines.sh | 10 - .../kubeflow/manifests/tests/e2e/mnist.py | 83 ---- .../manifests/tests/e2e/requirements.txt | 2 - .../kubeflow/manifests/tests/e2e/runner.sh | 17 - .../kubeflow/manifests/tests/e2e/settings.py | 9 - .../manifests/tests/e2e/utils/isvc.py | 20 - .../manifests/tests/e2e/utils/katib.py | 199 --------- .../manifests/tests/e2e/utils/kserve.py | 28 -- .../manifests/tests/e2e/utils/tfjob.py | 127 ------ .../manifests/tests/e2e/utils/watch.py | 76 ---- .../manifests/tests/e2e/yamls/role.yaml | 78 ---- .../tests/e2e/yamls/sa-role-binding.yaml | 13 - .../tests/e2e/yamls/service-account.yaml | 5 - .../tests/e2e/yamls/user-role-binding.yaml | 13 - .../tests/gh-actions/install_argo_cli.sh | 14 - .../tests/gh-actions/install_cert_manager.sh | 9 - .../tests/gh-actions/install_istio-cni.sh | 7 - .../tests/gh-actions/install_istio.sh | 10 - .../tests/gh-actions/install_kind.sh | 10 - .../tests/gh-actions/install_knative-cni.sh | 13 - .../tests/gh-actions/install_knative.sh | 13 - .../tests/gh-actions/install_kserve.sh | 12 - .../tests/gh-actions/install_kustomize.sh | 6 - .../tests/gh-actions/install_pipelines.sh | 11 - .../gh-actions/kf-objects/katib_test.yaml | 78 ---- .../gh-actions/kf-objects/kserve_test.yaml | 15 - .../gh-actions/kf-objects/test_pipeline.py | 28 -- .../tests/gh-actions/kf-objects/tfjob.yaml | 21 - .../tests/gh-actions/kind-cluster-1-24.yaml | 27 -- .../tests/gh-actions/kind-cluster-1-25.yaml | 26 -- .../tests/gh-actions/kind-cluster.yaml | 26 -- deployment/kustomization.yaml | 1 - .../tests/e2e/utils => tests}/__init__.py | 0 tests/__pycache__/__init__.cpython-38.pyc | Bin 0 -> 170 bytes .../conftest.cpython-38-pytest-7.1.3.pyc | Bin 0 -> 2019 bytes ..._cluster_ready.cpython-38-pytest-7.1.3.pyc | Bin 0 -> 2682 bytes .../test_kfp.cpython-38-pytest-7.1.3.pyc | Bin 0 -> 4966 bytes .../test_mlflow.cpython-38-pytest-7.1.3.pyc | Bin 0 -> 5181 bytes .../test_registry.cpython-38-pytest-7.1.3.pyc | Bin 0 -> 2120 bytes tests/conftest.py | 36 ++ tests/requirements-tests.txt | 15 + .../create_user/create_user.sh | 95 +++++ .../create_user/csr.cnf.template | 15 + .../create_user/csr.yaml.template | 14 + .../create_user/kubeconfig.template | 17 + tests/resources/kfp/Dockerfile | 10 + tests/resources/kfp/__init__.py | 0 tests/resources/kfp/build_image.sh | 19 + tests/resources/kfp/pipeline.yaml | 34 ++ tests/resources/kfp/requirements.txt | 2 + tests/resources/kfp/train.py | 22 + tests/resources/registry/Dockerfile | 10 + tests/resources/registry/build_push_image.sh | 21 + .../resources/registry/pipeline.yaml.template | 34 ++ tests/resources/registry/requirements.txt | 1 + tests/resources/registry/train.py | 8 + tests/test_cluster_ready.py | 53 +++ tests/test_kfp.py | 211 ++++++++++ tests/test_mlflow.py | 185 ++++++++ tests/test_registry.py | 60 +++ tests/wait_deployment_ready.py | 87 ++++ 77 files changed, 953 insertions(+), 2549 deletions(-) create mode 100644 config.env delete mode 100644 deployment/kubeflow/manifests/apps/kfp-tekton/upstream/base/installs/multi-user/pipelines-profile-controller/test_sync.py delete mode 100644 deployment/kubeflow/manifests/apps/kfp-tekton/upstream/v1/base/installs/multi-user/pipelines-profile-controller/test_sync.py delete mode 100644 deployment/kubeflow/manifests/apps/pipeline/upstream/base/installs/multi-user/pipelines-profile-controller/test_sync.py delete mode 100644 deployment/kubeflow/manifests/contrib/kserve/tests/test_sklearn.py delete mode 100644 deployment/kubeflow/manifests/proposals/20200913-rootlessKubeflow.md delete mode 100644 deployment/kubeflow/manifests/proposals/20220926-contrib-component-guidelines.md delete mode 100644 deployment/kubeflow/manifests/proposals/20230323-end-to-end-testing.md delete mode 100644 deployment/kubeflow/manifests/proposals/README.md delete mode 100644 deployment/kubeflow/manifests/tests/README.md delete mode 100644 deployment/kubeflow/manifests/tests/e2e/.gitignore delete mode 100644 deployment/kubeflow/manifests/tests/e2e/README.md delete mode 100755 deployment/kubeflow/manifests/tests/e2e/hack/cleanup_proxies.sh delete mode 100755 deployment/kubeflow/manifests/tests/e2e/hack/cleanup_yamls.sh delete mode 100755 deployment/kubeflow/manifests/tests/e2e/hack/proxy_istio.sh delete mode 100755 deployment/kubeflow/manifests/tests/e2e/hack/proxy_pipelines.sh delete mode 100644 deployment/kubeflow/manifests/tests/e2e/mnist.py delete mode 100644 deployment/kubeflow/manifests/tests/e2e/requirements.txt delete mode 100755 deployment/kubeflow/manifests/tests/e2e/runner.sh delete mode 100644 deployment/kubeflow/manifests/tests/e2e/settings.py delete mode 100644 deployment/kubeflow/manifests/tests/e2e/utils/isvc.py delete mode 100644 deployment/kubeflow/manifests/tests/e2e/utils/katib.py delete mode 100644 deployment/kubeflow/manifests/tests/e2e/utils/kserve.py delete mode 100644 deployment/kubeflow/manifests/tests/e2e/utils/tfjob.py delete mode 100644 deployment/kubeflow/manifests/tests/e2e/utils/watch.py delete mode 100644 deployment/kubeflow/manifests/tests/e2e/yamls/role.yaml delete mode 100644 deployment/kubeflow/manifests/tests/e2e/yamls/sa-role-binding.yaml delete mode 100644 deployment/kubeflow/manifests/tests/e2e/yamls/service-account.yaml delete mode 100644 deployment/kubeflow/manifests/tests/e2e/yamls/user-role-binding.yaml delete mode 100755 deployment/kubeflow/manifests/tests/gh-actions/install_argo_cli.sh delete mode 100755 deployment/kubeflow/manifests/tests/gh-actions/install_cert_manager.sh delete mode 100755 deployment/kubeflow/manifests/tests/gh-actions/install_istio-cni.sh delete mode 100755 deployment/kubeflow/manifests/tests/gh-actions/install_istio.sh delete mode 100755 deployment/kubeflow/manifests/tests/gh-actions/install_kind.sh delete mode 100755 deployment/kubeflow/manifests/tests/gh-actions/install_knative-cni.sh delete mode 100755 deployment/kubeflow/manifests/tests/gh-actions/install_knative.sh delete mode 100755 deployment/kubeflow/manifests/tests/gh-actions/install_kserve.sh delete mode 100755 deployment/kubeflow/manifests/tests/gh-actions/install_kustomize.sh delete mode 100755 deployment/kubeflow/manifests/tests/gh-actions/install_pipelines.sh delete mode 100644 deployment/kubeflow/manifests/tests/gh-actions/kf-objects/katib_test.yaml delete mode 100644 deployment/kubeflow/manifests/tests/gh-actions/kf-objects/kserve_test.yaml delete mode 100755 deployment/kubeflow/manifests/tests/gh-actions/kf-objects/test_pipeline.py delete mode 100644 deployment/kubeflow/manifests/tests/gh-actions/kf-objects/tfjob.yaml delete mode 100644 deployment/kubeflow/manifests/tests/gh-actions/kind-cluster-1-24.yaml delete mode 100644 deployment/kubeflow/manifests/tests/gh-actions/kind-cluster-1-25.yaml delete mode 100644 deployment/kubeflow/manifests/tests/gh-actions/kind-cluster.yaml rename {deployment/kubeflow/manifests/tests/e2e/utils => tests}/__init__.py (100%) create mode 100644 tests/__pycache__/__init__.cpython-38.pyc create mode 100644 tests/__pycache__/conftest.cpython-38-pytest-7.1.3.pyc create mode 100644 tests/__pycache__/test_cluster_ready.cpython-38-pytest-7.1.3.pyc create mode 100644 tests/__pycache__/test_kfp.cpython-38-pytest-7.1.3.pyc create mode 100644 tests/__pycache__/test_mlflow.cpython-38-pytest-7.1.3.pyc create mode 100644 tests/__pycache__/test_registry.cpython-38-pytest-7.1.3.pyc create mode 100644 tests/conftest.py create mode 100644 tests/requirements-tests.txt create mode 100755 tests/resources/access_management/create_user/create_user.sh create mode 100644 tests/resources/access_management/create_user/csr.cnf.template create mode 100644 tests/resources/access_management/create_user/csr.yaml.template create mode 100644 tests/resources/access_management/create_user/kubeconfig.template create mode 100644 tests/resources/kfp/Dockerfile create mode 100644 tests/resources/kfp/__init__.py create mode 100755 tests/resources/kfp/build_image.sh create mode 100644 tests/resources/kfp/pipeline.yaml create mode 100644 tests/resources/kfp/requirements.txt create mode 100644 tests/resources/kfp/train.py create mode 100644 tests/resources/registry/Dockerfile create mode 100755 tests/resources/registry/build_push_image.sh create mode 100644 tests/resources/registry/pipeline.yaml.template create mode 100644 tests/resources/registry/requirements.txt create mode 100644 tests/resources/registry/train.py create mode 100644 tests/test_cluster_ready.py create mode 100644 tests/test_kfp.py create mode 100644 tests/test_mlflow.py create mode 100644 tests/test_registry.py create mode 100644 tests/wait_deployment_ready.py diff --git a/config.env b/config.env new file mode 100644 index 0000000..7de3953 --- /dev/null +++ b/config.env @@ -0,0 +1,3 @@ +HOST_IP="127.0.0.1" +CLUSTER_NAME="kind-ep" +INSTALL_LOCAL_REGISTRY="true" diff --git a/deployment/kubeflow/manifests/apps/kfp-tekton/upstream/base/installs/multi-user/pipelines-profile-controller/test_sync.py b/deployment/kubeflow/manifests/apps/kfp-tekton/upstream/base/installs/multi-user/pipelines-profile-controller/test_sync.py deleted file mode 100644 index 50362d6..0000000 --- a/deployment/kubeflow/manifests/apps/kfp-tekton/upstream/base/installs/multi-user/pipelines-profile-controller/test_sync.py +++ /dev/null @@ -1,286 +0,0 @@ -import os -from unittest import mock -import threading -from sync import get_settings_from_env, server_factory -import json - -import pytest -import requests - -# Data sets passed to server -DATA_INCORRECT_CHILDREN = { - "parent": { - "metadata": { - "labels": { - "pipelines.kubeflow.org/enabled": "true" - }, - "name": "myName" - } - }, - "children": { - "Secret.v1": [], - "ConfigMap.v1": [], - "Deployment.apps/v1": [], - "Service.v1": [], - "DestinationRule.networking.istio.io/v1alpha3": [], - "AuthorizationPolicy.security.istio.io/v1beta1": [], - } -} - -DATA_CORRECT_CHILDREN = { - "parent": { - "metadata": { - "labels": { - "pipelines.kubeflow.org/enabled": "true" - }, - "name": "myName" - } - }, - "children": { - "Secret.v1": [1], - "ConfigMap.v1": [1], - "Deployment.apps/v1": [1, 1], - "Service.v1": [1, 1], - "DestinationRule.networking.istio.io/v1alpha3": [1], - "AuthorizationPolicy.security.istio.io/v1beta1": [1], - } -} - -DATA_MISSING_PIPELINE_ENABLED = {"parent": {}, "children": {}} - -# Default values when environments are not explicit -DEFAULT_FRONTEND_IMAGE = "gcr.io/ml-pipeline/frontend" -DEFAULT_VISUALIZATION_IMAGE = "gcr.io/ml-pipeline/visualization-server" - -# Variables used for environment variable sets -VISUALIZATION_SERVER_IMAGE = "vis-image" -VISUALIZATION_SERVER_TAG = "somenumber.1.2.3" -FRONTEND_IMAGE = "frontend-image" -FRONTEND_TAG = "somehash" - -KFP_VERSION = "x.y.z" - -MINIO_ACCESS_KEY = "abcdef" -MINIO_SECRET_KEY = "uvwxyz" - -# "Environments" used in tests -ENV_VARIABLES_BASE = { - "MINIO_ACCESS_KEY": MINIO_ACCESS_KEY, - "MINIO_SECRET_KEY": MINIO_SECRET_KEY, - "CONTROLLER_PORT": "0", # HTTPServer randomly assigns the port to a free port -} - -ENV_KFP_VERSION_ONLY = dict(ENV_VARIABLES_BASE, - **{ - "KFP_VERSION": KFP_VERSION, - } - ) - -ENV_IMAGES_NO_TAGS = dict(ENV_VARIABLES_BASE, - **{ - "KFP_VERSION": KFP_VERSION, - "VISUALIZATION_SERVER_IMAGE": VISUALIZATION_SERVER_IMAGE, - "FRONTEND_IMAGE": FRONTEND_IMAGE, - } - ) - -ENV_IMAGES_WITH_TAGS = dict(ENV_VARIABLES_BASE, - **{ - "VISUALIZATION_SERVER_IMAGE": VISUALIZATION_SERVER_IMAGE, - "FRONTEND_IMAGE": FRONTEND_IMAGE, - "VISUALIZATION_SERVER_TAG": VISUALIZATION_SERVER_TAG, - "FRONTEND_TAG": FRONTEND_TAG, - } - ) - -ENV_IMAGES_WITH_TAGS_AND_ISTIO = dict(ENV_IMAGES_WITH_TAGS, - **{ - "DISABLE_ISTIO_SIDECAR": "false", - } - ) - - -def generate_image_name(imagename, tag): - return f"{str(imagename)}:{str(tag)}" - - -@pytest.fixture( - scope="function", -) -def sync_server(request): - """ - Starts the sync HTTP server for a given set of environment variables on a separate thread - - Yields: - * the server (useful to interrogate for the server address) - * environment variables (useful to interrogate for correct responses) - """ - environ = request.param - with mock.patch.dict(os.environ, environ): - # Create a server at an available port and serve it on a thread as a daemon - # This will result in a collection of servers being active - not a great way - # if this fixture is run many times during a test, but ok for now - settings = get_settings_from_env() - server = server_factory(**settings) - server_thread = threading.Thread(target=server.serve_forever) - # Put on daemon so it doesn't keep pytest from ending - server_thread.daemon = True - server_thread.start() - yield server, environ - - -@pytest.fixture( - scope="function", -) -def sync_server_from_arguments(request): - """ - Starts the sync HTTP server for a given set of parameters passed as arguments, with server on a separate thread - - Yields: - * the server (useful to interrogate for the server address) - * environment variables (useful to interrogate for correct responses) - """ - environ = {k.lower(): v for k, v in request.param.items()} - settings = environ - server = server_factory(**settings) - server_thread = threading.Thread(target=server.serve_forever) - # Put on daemon so it doesn't keep pytest from ending - server_thread.daemon = True - server_thread.start() - yield server, environ - - -@pytest.mark.parametrize( - "sync_server, data, expected_status, expected_visualization_server_image, expected_frontend_server_image", - [ - ( - ENV_KFP_VERSION_ONLY, - DATA_INCORRECT_CHILDREN, - {"kubeflow-pipelines-ready": "False"}, - generate_image_name(DEFAULT_VISUALIZATION_IMAGE, KFP_VERSION), - generate_image_name(DEFAULT_FRONTEND_IMAGE, KFP_VERSION), - ), - ( - ENV_IMAGES_NO_TAGS, - DATA_INCORRECT_CHILDREN, - {"kubeflow-pipelines-ready": "False"}, - generate_image_name(ENV_IMAGES_NO_TAGS["VISUALIZATION_SERVER_IMAGE"], KFP_VERSION), - generate_image_name(ENV_IMAGES_NO_TAGS["FRONTEND_IMAGE"], KFP_VERSION), - ), - ( - ENV_IMAGES_WITH_TAGS, - DATA_INCORRECT_CHILDREN, - {"kubeflow-pipelines-ready": "False"}, - generate_image_name(ENV_IMAGES_WITH_TAGS["VISUALIZATION_SERVER_IMAGE"], - ENV_IMAGES_WITH_TAGS["VISUALIZATION_SERVER_TAG"]), - generate_image_name(ENV_IMAGES_WITH_TAGS["FRONTEND_IMAGE"], ENV_IMAGES_WITH_TAGS["FRONTEND_TAG"]), - ), - ( - ENV_IMAGES_WITH_TAGS, - DATA_CORRECT_CHILDREN, - {"kubeflow-pipelines-ready": "True"}, - generate_image_name(ENV_IMAGES_WITH_TAGS["VISUALIZATION_SERVER_IMAGE"], - ENV_IMAGES_WITH_TAGS["VISUALIZATION_SERVER_TAG"]), - generate_image_name(ENV_IMAGES_WITH_TAGS["FRONTEND_IMAGE"], ENV_IMAGES_WITH_TAGS["FRONTEND_TAG"]), - ), - ], - indirect=["sync_server"] -) -def test_sync_server_with_pipeline_enabled(sync_server, data, expected_status, - expected_visualization_server_image, expected_frontend_server_image): - """ - Nearly end-to-end test of how Controller serves .sync as a POST - - Tests case where metadata.labels.pipelines.kubeflow.org/enabled exists, and thus - we should produce children - - Only does spot checks on children to see if key properties are correct - """ - server, environ = sync_server - - # server.server_address = (url, port_as_integer) - url = f"http://{server.server_address[0]}:{str(server.server_address[1])}" - print("url: ", url) - print("data") - print(json.dumps(data)) - x = requests.post(url, data=json.dumps(data)) - results = json.loads(x.text) - - # Test overall status of whether children are ok - assert results['status'] == expected_status - - # Poke a few children to test things that can vary by environment variable - assert results['children'][1]["spec"]["template"]["spec"]["containers"][0][ - "image"] == expected_visualization_server_image - assert results['children'][5]["spec"]["template"]["spec"]["containers"][0][ - "image"] == expected_frontend_server_image - - -@pytest.mark.parametrize( - "sync_server_from_arguments, data, expected_status, expected_visualization_server_image, " - "expected_frontend_server_image", - [ - ( - ENV_IMAGES_WITH_TAGS_AND_ISTIO, - DATA_CORRECT_CHILDREN, - {"kubeflow-pipelines-ready": "True"}, - generate_image_name(ENV_IMAGES_WITH_TAGS["VISUALIZATION_SERVER_IMAGE"], - ENV_IMAGES_WITH_TAGS["VISUALIZATION_SERVER_TAG"]), - generate_image_name(ENV_IMAGES_WITH_TAGS["FRONTEND_IMAGE"], ENV_IMAGES_WITH_TAGS["FRONTEND_TAG"]), - ), - ], - indirect=["sync_server_from_arguments"] -) -def test_sync_server_with_direct_passing_of_settings( - sync_server_from_arguments, data, expected_status, expected_visualization_server_image, - expected_frontend_server_image): - """ - Nearly end-to-end test of how Controller serves .sync as a POST, taking variables as arguments - - Only does spot checks on children to see if key properties are correct - """ - server, environ = sync_server_from_arguments - - # server.server_address = (url, port_as_integer) - url = f"http://{server.server_address[0]}:{str(server.server_address[1])}" - print("url: ", url) - print("data") - print(json.dumps(data)) - x = requests.post(url, data=json.dumps(data)) - results = json.loads(x.text) - - # Test overall status of whether children are ok - assert results['status'] == expected_status - - # Poke a few children to test things that can vary by environment variable - assert results['children'][1]["spec"]["template"]["spec"]["containers"][0][ - "image"] == expected_visualization_server_image - assert results['children'][5]["spec"]["template"]["spec"]["containers"][0][ - "image"] == expected_frontend_server_image - - -@pytest.mark.parametrize( - "sync_server, data, expected_status, expected_children", - [ - (ENV_IMAGES_WITH_TAGS, DATA_MISSING_PIPELINE_ENABLED, {}, []), - ], - indirect=["sync_server"] -) -def test_sync_server_without_pipeline_enabled(sync_server, data, expected_status, - expected_children): - """ - Nearly end-to-end test of how Controller serves .sync as a POST - - Tests case where metadata.labels.pipelines.kubeflow.org/enabled does not - exist and thus server returns an empty reply - """ - server, environ = sync_server - - # server.server_address = (url, port_as_integer) - url = f"http://{server.server_address[0]}:{str(server.server_address[1])}" - x = requests.post(url, data=json.dumps(data)) - results = json.loads(x.text) - - # Test overall status of whether children are ok - assert results['status'] == expected_status - assert results['children'] == expected_children diff --git a/deployment/kubeflow/manifests/apps/kfp-tekton/upstream/v1/base/installs/multi-user/pipelines-profile-controller/test_sync.py b/deployment/kubeflow/manifests/apps/kfp-tekton/upstream/v1/base/installs/multi-user/pipelines-profile-controller/test_sync.py deleted file mode 100644 index 6158e3f..0000000 --- a/deployment/kubeflow/manifests/apps/kfp-tekton/upstream/v1/base/installs/multi-user/pipelines-profile-controller/test_sync.py +++ /dev/null @@ -1,286 +0,0 @@ -import os -from unittest import mock -import threading -from sync import get_settings_from_env, server_factory -import json - -import pytest -import requests - -# Data sets passed to server -DATA_INCORRECT_CHILDREN = { - "parent": { - "metadata": { - "labels": { - "pipelines.kubeflow.org/enabled": "true" - }, - "name": "myName" - } - }, - "children": { - "Secret.v1": [], - "ConfigMap.v1": [], - "Deployment.apps/v1": [], - "Service.v1": [], - "DestinationRule.networking.istio.io/v1alpha3": [], - "AuthorizationPolicy.security.istio.io/v1beta1": [], - } -} - -DATA_CORRECT_CHILDREN = { - "parent": { - "metadata": { - "labels": { - "pipelines.kubeflow.org/enabled": "true" - }, - "name": "myName" - } - }, - "children": { - "Secret.v1": [1], - "ConfigMap.v1": [1], - "Deployment.apps/v1": [1, 1], - "Service.v1": [1, 1], - "DestinationRule.networking.istio.io/v1alpha3": [1], - "AuthorizationPolicy.security.istio.io/v1beta1": [1], - } -} - -DATA_MISSING_PIPELINE_ENABLED = {"parent": {}, "children": {}} - -# Default values when environments are not explicit -DEFAULT_FRONTEND_IMAGE = "gcr.io/ml-pipeline/frontend" -DEFAULT_VISUALIZATION_IMAGE = "gcr.io/ml-pipeline/visualization-server" - -# Variables used for environment variable sets -VISUALIZATION_SERVER_IMAGE = "vis-image" -VISUALIZATION_SERVER_TAG = "somenumber.1.2.3" -FRONTEND_IMAGE = "frontend-image" -FRONTEND_TAG = "somehash" - -KFP_VERSION = "x.y.z" - -MINIO_ACCESS_KEY = "abcdef" -MINIO_SECRET_KEY = "uvwxyz" - -# "Environments" used in tests -ENV_VARIABLES_BASE = { - "MINIO_ACCESS_KEY": MINIO_ACCESS_KEY, - "MINIO_SECRET_KEY": MINIO_SECRET_KEY, - "CONTROLLER_PORT": "0", # HTTPServer randomly assigns the port to a free port -} - -ENV_KFP_VERSION_ONLY = dict(ENV_VARIABLES_BASE, - **{ - "KFP_VERSION": KFP_VERSION, - } - ) - -ENV_IMAGES_NO_TAGS = dict(ENV_VARIABLES_BASE, - **{ - "KFP_VERSION": KFP_VERSION, - "VISUALIZATION_SERVER_IMAGE": VISUALIZATION_SERVER_IMAGE, - "FRONTEND_IMAGE": FRONTEND_IMAGE, - } - ) - -ENV_IMAGES_WITH_TAGS = dict(ENV_VARIABLES_BASE, - **{ - "VISUALIZATION_SERVER_IMAGE": VISUALIZATION_SERVER_IMAGE, - "FRONTEND_IMAGE": FRONTEND_IMAGE, - "VISUALIZATION_SERVER_TAG": VISUALIZATION_SERVER_TAG, - "FRONTEND_TAG": FRONTEND_TAG, - } - ) - -ENV_IMAGES_WITH_TAGS_AND_ISTIO = dict(ENV_IMAGES_WITH_TAGS, - **{ - "DISABLE_ISTIO_SIDECAR": "false", - } - ) - - -def generate_image_name(imagename, tag): - return f"{str(imagename)}:{str(tag)}" - - -@pytest.fixture( - scope="function", -) -def sync_server(request): - """ - Starts the sync HTTP server for a given set of environment variables on a separate thread - - Yields: - * the server (useful to interrogate for the server address) - * environment variables (useful to interrogate for correct responses) - """ - environ = request.param - with mock.patch.dict(os.environ, environ): - # Create a server at an available port and serve it on a thread as a daemon - # This will result in a collection of servers being active - not a great way - # if this fixture is run many times during a test, but ok for now - settings = get_settings_from_env() - server = server_factory(**settings) - server_thread = threading.Thread(target=server.serve_forever) - # Put on daemon so it doesn't keep pytest from ending - server_thread.daemon = True - server_thread.start() - yield server, environ - - -@pytest.fixture( - scope="function", -) -def sync_server_from_arguments(request): - """ - Starts the sync HTTP server for a given set of parameters passed as arguments, with server on a separate thread - - Yields: - * the server (useful to interrogate for the server address) - * environment variables (useful to interrogate for correct responses) - """ - environ = {k.lower(): v for k, v in request.param.items()} - settings = environ - server = server_factory(**settings) - server_thread = threading.Thread(target=server.serve_forever) - # Put on daemon so it doesn't keep pytest from ending - server_thread.daemon = True - server_thread.start() - yield server, environ - - -@pytest.mark.parametrize( - "sync_server, data, expected_status, expected_visualization_server_image, expected_frontend_server_image", - [ - ( - ENV_KFP_VERSION_ONLY, - DATA_INCORRECT_CHILDREN, - {"kubeflow-pipelines-ready": "False"}, - generate_image_name(DEFAULT_VISUALIZATION_IMAGE, KFP_VERSION), - generate_image_name(DEFAULT_FRONTEND_IMAGE, KFP_VERSION), - ), - ( - ENV_IMAGES_NO_TAGS, - DATA_INCORRECT_CHILDREN, - {"kubeflow-pipelines-ready": "False"}, - generate_image_name(ENV_IMAGES_NO_TAGS["VISUALIZATION_SERVER_IMAGE"], KFP_VERSION), - generate_image_name(ENV_IMAGES_NO_TAGS["FRONTEND_IMAGE"], KFP_VERSION), - ), - ( - ENV_IMAGES_WITH_TAGS, - DATA_INCORRECT_CHILDREN, - {"kubeflow-pipelines-ready": "False"}, - generate_image_name(ENV_IMAGES_WITH_TAGS["VISUALIZATION_SERVER_IMAGE"], - ENV_IMAGES_WITH_TAGS["VISUALIZATION_SERVER_TAG"]), - generate_image_name(ENV_IMAGES_WITH_TAGS["FRONTEND_IMAGE"], ENV_IMAGES_WITH_TAGS["FRONTEND_TAG"]), - ), - ( - ENV_IMAGES_WITH_TAGS, - DATA_CORRECT_CHILDREN, - {"kubeflow-pipelines-ready": "True"}, - generate_image_name(ENV_IMAGES_WITH_TAGS["VISUALIZATION_SERVER_IMAGE"], - ENV_IMAGES_WITH_TAGS["VISUALIZATION_SERVER_TAG"]), - generate_image_name(ENV_IMAGES_WITH_TAGS["FRONTEND_IMAGE"], ENV_IMAGES_WITH_TAGS["FRONTEND_TAG"]), - ), - ], - indirect=["sync_server"] -) -def test_sync_server_with_pipeline_enabled(sync_server, data, expected_status, - expected_visualization_server_image, expected_frontend_server_image): - """ - Nearly end-to-end test of how Controller serves .sync as a POST - - Tests case where metadata.labels.pipelines.kubeflow.org/enabled exists, and thus - we should produce children - - Only does spot checks on children to see if key properties are correct - """ - server, environ = sync_server - - # server.server_address = (url, port_as_integer) - url = f"http://{server.server_address[0]}:{str(server.server_address[1])}" - print("url: ", url) - print("data") - print(json.dumps(data, indent=2)) - x = requests.post(url, data=json.dumps(data)) - results = json.loads(x.text) - - # Test overall status of whether children are ok - assert results['status'] == expected_status - - # Poke a few children to test things that can vary by environment variable - assert results['children'][1]["spec"]["template"]["spec"]["containers"][0][ - "image"] == expected_visualization_server_image - assert results['children'][5]["spec"]["template"]["spec"]["containers"][0][ - "image"] == expected_frontend_server_image - - -@pytest.mark.parametrize( - "sync_server_from_arguments, data, expected_status, expected_visualization_server_image, " - "expected_frontend_server_image", - [ - ( - ENV_IMAGES_WITH_TAGS_AND_ISTIO, - DATA_CORRECT_CHILDREN, - {"kubeflow-pipelines-ready": "True"}, - generate_image_name(ENV_IMAGES_WITH_TAGS["VISUALIZATION_SERVER_IMAGE"], - ENV_IMAGES_WITH_TAGS["VISUALIZATION_SERVER_TAG"]), - generate_image_name(ENV_IMAGES_WITH_TAGS["FRONTEND_IMAGE"], ENV_IMAGES_WITH_TAGS["FRONTEND_TAG"]), - ), - ], - indirect=["sync_server_from_arguments"] -) -def test_sync_server_with_direct_passing_of_settings( - sync_server_from_arguments, data, expected_status, expected_visualization_server_image, - expected_frontend_server_image): - """ - Nearly end-to-end test of how Controller serves .sync as a POST, taking variables as arguments - - Only does spot checks on children to see if key properties are correct - """ - server, environ = sync_server_from_arguments - - # server.server_address = (url, port_as_integer) - url = f"http://{server.server_address[0]}:{str(server.server_address[1])}" - print("url: ", url) - print("data") - print(json.dumps(data, indent=2)) - x = requests.post(url, data=json.dumps(data)) - results = json.loads(x.text) - - # Test overall status of whether children are ok - assert results['status'] == expected_status - - # Poke a few children to test things that can vary by environment variable - assert results['children'][1]["spec"]["template"]["spec"]["containers"][0][ - "image"] == expected_visualization_server_image - assert results['children'][5]["spec"]["template"]["spec"]["containers"][0][ - "image"] == expected_frontend_server_image - - -@pytest.mark.parametrize( - "sync_server, data, expected_status, expected_children", - [ - (ENV_IMAGES_WITH_TAGS, DATA_MISSING_PIPELINE_ENABLED, {}, []), - ], - indirect=["sync_server"] -) -def test_sync_server_without_pipeline_enabled(sync_server, data, expected_status, - expected_children): - """ - Nearly end-to-end test of how Controller serves .sync as a POST - - Tests case where metadata.labels.pipelines.kubeflow.org/enabled does not - exist and thus server returns an empty reply - """ - server, environ = sync_server - - # server.server_address = (url, port_as_integer) - url = f"http://{server.server_address[0]}:{str(server.server_address[1])}" - x = requests.post(url, data=json.dumps(data)) - results = json.loads(x.text) - - # Test overall status of whether children are ok - assert results['status'] == expected_status - assert results['children'] == expected_children diff --git a/deployment/kubeflow/manifests/apps/pipeline/upstream/base/installs/multi-user/pipelines-profile-controller/test_sync.py b/deployment/kubeflow/manifests/apps/pipeline/upstream/base/installs/multi-user/pipelines-profile-controller/test_sync.py deleted file mode 100644 index 50362d6..0000000 --- a/deployment/kubeflow/manifests/apps/pipeline/upstream/base/installs/multi-user/pipelines-profile-controller/test_sync.py +++ /dev/null @@ -1,286 +0,0 @@ -import os -from unittest import mock -import threading -from sync import get_settings_from_env, server_factory -import json - -import pytest -import requests - -# Data sets passed to server -DATA_INCORRECT_CHILDREN = { - "parent": { - "metadata": { - "labels": { - "pipelines.kubeflow.org/enabled": "true" - }, - "name": "myName" - } - }, - "children": { - "Secret.v1": [], - "ConfigMap.v1": [], - "Deployment.apps/v1": [], - "Service.v1": [], - "DestinationRule.networking.istio.io/v1alpha3": [], - "AuthorizationPolicy.security.istio.io/v1beta1": [], - } -} - -DATA_CORRECT_CHILDREN = { - "parent": { - "metadata": { - "labels": { - "pipelines.kubeflow.org/enabled": "true" - }, - "name": "myName" - } - }, - "children": { - "Secret.v1": [1], - "ConfigMap.v1": [1], - "Deployment.apps/v1": [1, 1], - "Service.v1": [1, 1], - "DestinationRule.networking.istio.io/v1alpha3": [1], - "AuthorizationPolicy.security.istio.io/v1beta1": [1], - } -} - -DATA_MISSING_PIPELINE_ENABLED = {"parent": {}, "children": {}} - -# Default values when environments are not explicit -DEFAULT_FRONTEND_IMAGE = "gcr.io/ml-pipeline/frontend" -DEFAULT_VISUALIZATION_IMAGE = "gcr.io/ml-pipeline/visualization-server" - -# Variables used for environment variable sets -VISUALIZATION_SERVER_IMAGE = "vis-image" -VISUALIZATION_SERVER_TAG = "somenumber.1.2.3" -FRONTEND_IMAGE = "frontend-image" -FRONTEND_TAG = "somehash" - -KFP_VERSION = "x.y.z" - -MINIO_ACCESS_KEY = "abcdef" -MINIO_SECRET_KEY = "uvwxyz" - -# "Environments" used in tests -ENV_VARIABLES_BASE = { - "MINIO_ACCESS_KEY": MINIO_ACCESS_KEY, - "MINIO_SECRET_KEY": MINIO_SECRET_KEY, - "CONTROLLER_PORT": "0", # HTTPServer randomly assigns the port to a free port -} - -ENV_KFP_VERSION_ONLY = dict(ENV_VARIABLES_BASE, - **{ - "KFP_VERSION": KFP_VERSION, - } - ) - -ENV_IMAGES_NO_TAGS = dict(ENV_VARIABLES_BASE, - **{ - "KFP_VERSION": KFP_VERSION, - "VISUALIZATION_SERVER_IMAGE": VISUALIZATION_SERVER_IMAGE, - "FRONTEND_IMAGE": FRONTEND_IMAGE, - } - ) - -ENV_IMAGES_WITH_TAGS = dict(ENV_VARIABLES_BASE, - **{ - "VISUALIZATION_SERVER_IMAGE": VISUALIZATION_SERVER_IMAGE, - "FRONTEND_IMAGE": FRONTEND_IMAGE, - "VISUALIZATION_SERVER_TAG": VISUALIZATION_SERVER_TAG, - "FRONTEND_TAG": FRONTEND_TAG, - } - ) - -ENV_IMAGES_WITH_TAGS_AND_ISTIO = dict(ENV_IMAGES_WITH_TAGS, - **{ - "DISABLE_ISTIO_SIDECAR": "false", - } - ) - - -def generate_image_name(imagename, tag): - return f"{str(imagename)}:{str(tag)}" - - -@pytest.fixture( - scope="function", -) -def sync_server(request): - """ - Starts the sync HTTP server for a given set of environment variables on a separate thread - - Yields: - * the server (useful to interrogate for the server address) - * environment variables (useful to interrogate for correct responses) - """ - environ = request.param - with mock.patch.dict(os.environ, environ): - # Create a server at an available port and serve it on a thread as a daemon - # This will result in a collection of servers being active - not a great way - # if this fixture is run many times during a test, but ok for now - settings = get_settings_from_env() - server = server_factory(**settings) - server_thread = threading.Thread(target=server.serve_forever) - # Put on daemon so it doesn't keep pytest from ending - server_thread.daemon = True - server_thread.start() - yield server, environ - - -@pytest.fixture( - scope="function", -) -def sync_server_from_arguments(request): - """ - Starts the sync HTTP server for a given set of parameters passed as arguments, with server on a separate thread - - Yields: - * the server (useful to interrogate for the server address) - * environment variables (useful to interrogate for correct responses) - """ - environ = {k.lower(): v for k, v in request.param.items()} - settings = environ - server = server_factory(**settings) - server_thread = threading.Thread(target=server.serve_forever) - # Put on daemon so it doesn't keep pytest from ending - server_thread.daemon = True - server_thread.start() - yield server, environ - - -@pytest.mark.parametrize( - "sync_server, data, expected_status, expected_visualization_server_image, expected_frontend_server_image", - [ - ( - ENV_KFP_VERSION_ONLY, - DATA_INCORRECT_CHILDREN, - {"kubeflow-pipelines-ready": "False"}, - generate_image_name(DEFAULT_VISUALIZATION_IMAGE, KFP_VERSION), - generate_image_name(DEFAULT_FRONTEND_IMAGE, KFP_VERSION), - ), - ( - ENV_IMAGES_NO_TAGS, - DATA_INCORRECT_CHILDREN, - {"kubeflow-pipelines-ready": "False"}, - generate_image_name(ENV_IMAGES_NO_TAGS["VISUALIZATION_SERVER_IMAGE"], KFP_VERSION), - generate_image_name(ENV_IMAGES_NO_TAGS["FRONTEND_IMAGE"], KFP_VERSION), - ), - ( - ENV_IMAGES_WITH_TAGS, - DATA_INCORRECT_CHILDREN, - {"kubeflow-pipelines-ready": "False"}, - generate_image_name(ENV_IMAGES_WITH_TAGS["VISUALIZATION_SERVER_IMAGE"], - ENV_IMAGES_WITH_TAGS["VISUALIZATION_SERVER_TAG"]), - generate_image_name(ENV_IMAGES_WITH_TAGS["FRONTEND_IMAGE"], ENV_IMAGES_WITH_TAGS["FRONTEND_TAG"]), - ), - ( - ENV_IMAGES_WITH_TAGS, - DATA_CORRECT_CHILDREN, - {"kubeflow-pipelines-ready": "True"}, - generate_image_name(ENV_IMAGES_WITH_TAGS["VISUALIZATION_SERVER_IMAGE"], - ENV_IMAGES_WITH_TAGS["VISUALIZATION_SERVER_TAG"]), - generate_image_name(ENV_IMAGES_WITH_TAGS["FRONTEND_IMAGE"], ENV_IMAGES_WITH_TAGS["FRONTEND_TAG"]), - ), - ], - indirect=["sync_server"] -) -def test_sync_server_with_pipeline_enabled(sync_server, data, expected_status, - expected_visualization_server_image, expected_frontend_server_image): - """ - Nearly end-to-end test of how Controller serves .sync as a POST - - Tests case where metadata.labels.pipelines.kubeflow.org/enabled exists, and thus - we should produce children - - Only does spot checks on children to see if key properties are correct - """ - server, environ = sync_server - - # server.server_address = (url, port_as_integer) - url = f"http://{server.server_address[0]}:{str(server.server_address[1])}" - print("url: ", url) - print("data") - print(json.dumps(data)) - x = requests.post(url, data=json.dumps(data)) - results = json.loads(x.text) - - # Test overall status of whether children are ok - assert results['status'] == expected_status - - # Poke a few children to test things that can vary by environment variable - assert results['children'][1]["spec"]["template"]["spec"]["containers"][0][ - "image"] == expected_visualization_server_image - assert results['children'][5]["spec"]["template"]["spec"]["containers"][0][ - "image"] == expected_frontend_server_image - - -@pytest.mark.parametrize( - "sync_server_from_arguments, data, expected_status, expected_visualization_server_image, " - "expected_frontend_server_image", - [ - ( - ENV_IMAGES_WITH_TAGS_AND_ISTIO, - DATA_CORRECT_CHILDREN, - {"kubeflow-pipelines-ready": "True"}, - generate_image_name(ENV_IMAGES_WITH_TAGS["VISUALIZATION_SERVER_IMAGE"], - ENV_IMAGES_WITH_TAGS["VISUALIZATION_SERVER_TAG"]), - generate_image_name(ENV_IMAGES_WITH_TAGS["FRONTEND_IMAGE"], ENV_IMAGES_WITH_TAGS["FRONTEND_TAG"]), - ), - ], - indirect=["sync_server_from_arguments"] -) -def test_sync_server_with_direct_passing_of_settings( - sync_server_from_arguments, data, expected_status, expected_visualization_server_image, - expected_frontend_server_image): - """ - Nearly end-to-end test of how Controller serves .sync as a POST, taking variables as arguments - - Only does spot checks on children to see if key properties are correct - """ - server, environ = sync_server_from_arguments - - # server.server_address = (url, port_as_integer) - url = f"http://{server.server_address[0]}:{str(server.server_address[1])}" - print("url: ", url) - print("data") - print(json.dumps(data)) - x = requests.post(url, data=json.dumps(data)) - results = json.loads(x.text) - - # Test overall status of whether children are ok - assert results['status'] == expected_status - - # Poke a few children to test things that can vary by environment variable - assert results['children'][1]["spec"]["template"]["spec"]["containers"][0][ - "image"] == expected_visualization_server_image - assert results['children'][5]["spec"]["template"]["spec"]["containers"][0][ - "image"] == expected_frontend_server_image - - -@pytest.mark.parametrize( - "sync_server, data, expected_status, expected_children", - [ - (ENV_IMAGES_WITH_TAGS, DATA_MISSING_PIPELINE_ENABLED, {}, []), - ], - indirect=["sync_server"] -) -def test_sync_server_without_pipeline_enabled(sync_server, data, expected_status, - expected_children): - """ - Nearly end-to-end test of how Controller serves .sync as a POST - - Tests case where metadata.labels.pipelines.kubeflow.org/enabled does not - exist and thus server returns an empty reply - """ - server, environ = sync_server - - # server.server_address = (url, port_as_integer) - url = f"http://{server.server_address[0]}:{str(server.server_address[1])}" - x = requests.post(url, data=json.dumps(data)) - results = json.loads(x.text) - - # Test overall status of whether children are ok - assert results['status'] == expected_status - assert results['children'] == expected_children diff --git a/deployment/kubeflow/manifests/contrib/kserve/tests/test_sklearn.py b/deployment/kubeflow/manifests/contrib/kserve/tests/test_sklearn.py deleted file mode 100644 index 2c17257..0000000 --- a/deployment/kubeflow/manifests/contrib/kserve/tests/test_sklearn.py +++ /dev/null @@ -1,58 +0,0 @@ -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -from kubernetes import client -from kubernetes.client import V1ResourceRequirements - -from kserve import ( - constants, - KServeClient, - V1beta1InferenceService, - V1beta1InferenceServiceSpec, - V1beta1PredictorSpec, - V1beta1SKLearnSpec, -) -from utils import KSERVE_TEST_NAMESPACE -from utils import predict - - -def test_sklearn_kserve(): - service_name = "isvc-sklearn" - predictor = V1beta1PredictorSpec( - min_replicas=1, - sklearn=V1beta1SKLearnSpec( - storage_uri="gs://kfserving-examples/models/sklearn/1.0/model", - resources=V1ResourceRequirements( - requests={"cpu": "50m", "memory": "128Mi"}, - limits={"cpu": "100m", "memory": "256Mi"}, - ), - ), - ) - - isvc = V1beta1InferenceService( - api_version=constants.KSERVE_V1BETA1, - kind=constants.KSERVE_KIND, - metadata=client.V1ObjectMeta( - name=service_name, namespace=KSERVE_TEST_NAMESPACE - ), - spec=V1beta1InferenceServiceSpec(predictor=predictor), - ) - - kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) - kserve_client.create(isvc) - kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) - res = predict(service_name, "./data/iris_input.json") - assert res["predictions"] == [1, 1] - kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE) diff --git a/deployment/kubeflow/manifests/in-cluster-setup/kustomization.yaml b/deployment/kubeflow/manifests/in-cluster-setup/kustomization.yaml index 3f21d83..1dfefc7 100644 --- a/deployment/kubeflow/manifests/in-cluster-setup/kustomization.yaml +++ b/deployment/kubeflow/manifests/in-cluster-setup/kustomization.yaml @@ -48,7 +48,7 @@ resources: - ../common/knative/knative-eventing/base - ../common/istio-1-17/cluster-local-gateway/base # Kubeflow namespace -#- ../common/kubeflow-namespace/base +- ../common/kubeflow-namespace/base # Kubeflow Roles - ../common/kubeflow-roles/base # Kubeflow Istio Resources diff --git a/deployment/kubeflow/manifests/proposals/20200913-rootlessKubeflow.md b/deployment/kubeflow/manifests/proposals/20200913-rootlessKubeflow.md deleted file mode 100644 index ef7a77c..0000000 --- a/deployment/kubeflow/manifests/proposals/20200913-rootlessKubeflow.md +++ /dev/null @@ -1,50 +0,0 @@ -# Rootless Kubeflow - -Authors: Julius von Kohout (@juliusvonkohout) - -### Goals - -We want to run Kubeflow as rootless as possible according to CNCF/Kubernetes best practices. -Most enterprise environments will require this as well. - -### Implementation details -The main steps are adding an additional profile for istio-cni and later ambient mesh, updating the documentation and manifest generation process. -Only istio-cni or istio ambient mesh can run rootless as explained here https://istio.io/latest/docs/setup/additional-setup/cni/. -Istio-cni will still need a deamonset in kube-system, but that is completly isolated from user workloads. -The ambient mesh should get rid of this as well and also has the benefit of removing the istio initcontainers and sidecars altogether. -Then adding the baseline and restricted PSS as kustomize component to `/contrib` and extending the profile controller to annotate user namespaces with configurable PSS labels. - -We want to use a staged approach. - -#### First Stage: -1. Implement Istio 1.17.5 and use it by default, because 1.17. is what we have planned to use for Kubeflow 1.8. -2. Implement istio-cni (`--set components.cni.enabled=true --set components.cni.namespace=kube-system`) as second option. -3. Add simple tests similar to `tests/gh-actions/install_istio.sh` and `tests/gh-actions/install_knative.sh` for istio-cni and support both rootfull and rootless istio at the same time and give users one release to test - -#### Second stage: -4. Add pod security standards (https://kubernetes.io/docs/concepts/security/pod-security-standards/) `base/restricted` to `manifests/contrib` -5. Enforce PSS baseline (Adavanced users can still build OCI containers via Podman and buildah, but not Docker in Docker). The baseline PSS should works with any istio. If not we will move this item to the third stage after istio-cni or the ambient mesh is the default. -7. Enable Warnings for violations of restricted PSS -8. Add tests to make sure that the PSS are used and tested in the CICD -9. Optionally Enforce PSS restricted (this is where minor corner cases are affected) - -#### Third stage: -9. Upgrade Istio to 1.19 to make the ambient mesh available -10. Add istio-ambient as an option to the next Kubeflow release. - -#### Fourth stage: -11. Use the ambient service mesh by default in Kubeflow 1.10. - -### Non-Goals -This does not cover Application level CVEs, only cluster level security. - -### Does this break any existing functionality? -So far not. Only PSS restricted may block the security-wise dangerous Docker in Docker. -This is a rarely used feature from the KFP SDK. -With PSS baseline you can still build OCI images with Podman for example. -We should replace Docker with the cli compatible podman in the KFP SDK https://kubeflow-pipelines.readthedocs.io/en/1.8.22/source/kfp.containers.html?highlight=kfp.containers.build_image_from_working_dir#kfp.containers.build_image_from_working_dir. - - -### Does this fix/solve any outstanding issues? -This proposal enables Kubeflow to implement parts of Kubernetes best practices and improve the usage in enterprise and regulated environments. -The progress is tracked in https://github.com/kubeflow/manifests/issues/2528 diff --git a/deployment/kubeflow/manifests/proposals/20220926-contrib-component-guidelines.md b/deployment/kubeflow/manifests/proposals/20220926-contrib-component-guidelines.md deleted file mode 100644 index 379937e..0000000 --- a/deployment/kubeflow/manifests/proposals/20220926-contrib-component-guidelines.md +++ /dev/null @@ -1,65 +0,0 @@ -# Guidelines for /contrib Components - -**Authors**: Kimonas Sotirchos kimwnasptd@arrikto.com - -The motivation behind this proposal is to fully document expectations and -requirements that components under `/contrib` should satisfy. This will make it -more clear how to use a component, how it integrates with Kubeflow, the problems -it tries to solve as well as dependency versions. - -## Goals - -* Document requirements that components under `/contrib` should satisfy -* Introduce a process for deprecating unmaintained components - -## Non-Goals - -* Get into the discussion of which components are considered "Kubeflow" components - * The assumption until now is that components under the Kubeflow GitHub Org live - under the `/apps` dir, and all others under `/contrib` -* Provide a migration plan for components to move out from `/contrib` -* Modify the [`example/kustomization.yaml`](https://github.com/kubeflow/manifests/blob/master/example/kustomization.yaml) with new components - -## Proposal - -### Component Requirements - -Components living under `/contrib` should satisfy some strict requirements to -ensure they are always usable by end-users and contain complementary documentation. - -These are the requirements for all components under `/contrib`: -1. There must be a `README.md` file that documents: - * Instructions on how someone can install the component in a Kubeflow cluster - * Since Kubeflow manifests have standardized on [Kustomize](https://kustomize.io/) - we expect all manifests to be a kustomize packages - * How to use the component as part of Kubeflow (examples) - * The problems it tries to solve and the value it brings - * Links to the official documentation of the component -2. There must be an OWNERS file with at least 2 users -3. The component must work with the latest version of Kubeflow, and its - dependencies -4. There must be an `UPGRADE.md` file that documents any instructions users need - to follow when applying manifests of a newer version -5. There needs to be sufficient work on testing - * There must be a script file [python, bash etc] that vefiries the component - is working as expected. This can be something very simple, like submitting a - CustomResource and waiting for it to become Ready - * The maintainers will need to work with the leads of Manifests WG to ensure - there's some basic automation in place that will be running the above script(s) - -At this point we don't want to provide too much of a strict structure for the -README. Developers are free to expose any other information in the README that -they find fit, as long as the above info is exposed. - - -### Deprecation plan - -The proposed criteria for deciding that a component should be deprecated are: -1. The component can not be installed in the minimum K8s version supported by Kubeflow -2. The manifests are not working as expected and result in undeployable Pods -3. The documented examples do not work as expected - -If a component meets all the above criteria then it will initially be marked as -UNMAINTAINED/OUT-OF-DATE, in the component's README. Then if a whole Kubeflow release -cycle concludes and the component is still in UNMAINTAINED/OUT-OF-DATE phase and -without any feedback from the OWNERS, it will be removed. diff --git a/deployment/kubeflow/manifests/proposals/20230323-end-to-end-testing.md b/deployment/kubeflow/manifests/proposals/20230323-end-to-end-testing.md deleted file mode 100644 index 81bcaff..0000000 --- a/deployment/kubeflow/manifests/proposals/20230323-end-to-end-testing.md +++ /dev/null @@ -1,395 +0,0 @@ -# End-to-end Testing - -**Authors**: Dominik Fleischmann ([@domFleischmann](https://github.com/domFleischmann)), Kimonas -Sotirchos ([@kimwnasptd](https://github.com/kimwnasptd)), and Anna Jung ([@annajung](https://github.com/annajung)) - -## Background - -Previously, the Kubeflow community leveraged prow optional-test-infra for e2e testing with credit from AWS. After the -optional test infrastructure deprecation notice, all WGs moved their test to GitHub Actions as a temporary solution. Due -to resource constraints of GitHub-hosted runners, the Kubeflow community stopped supporting e2e tests as part of the -migration. In partnership with Amazon, a new AWS account has been created with sponsored credits. With the new AWS -account, the Kubeflow community is no longer limited by resource constraints posed by GitHub Actions. To enable the e2e -test for the Manifest repo, this doc proposes a design to set up the infrastructure needed to run the necessary tests. - -References - -- [Optional Test Infra Deprecation Notice](https://github.com/kubeflow/testing/issues/993) -- [Alternative solution to removal of test on optional-test-infra](https://github.com/kubeflow/testing/issues/1006) - -## Goal - -Enable the e2e testing for the Manifest repo and leverage it to shorten the manifest testing phase of the Kubeflow -release cycle and to increase quality of the Kubeflow release by ensuring Kubeflow components and dependencies work -correctly together. - -## Proposal - -After some initial conversations, it has been agreed to create integration tests based on GitHub Actions, which will -spawn an EC2 instance with enough resources to deploy the complete Kubeflow solution and run some end-to-end testing. - -## Implementation - -Below lists steps the GitHub actions will perform to complete end-to-end testing - -- [Create Credentials required by the AWS](#create-credentials-required-by-the-aws) -- [Create an EC2 instance](#create-an-ec2-instance) -- [Install a Kubernetes on the instance](#install-a-kubernetes-on-the-instance) -- [Deploy Kubeflow](#deploy-kubeflow) -- [Run tests](#run-tests) -- [Log and report errors](#log-and-report-errors) -- [Clean up](#clean-up) - -### Create credentials required by the AWS - -To leverage AWS, two credentials are required: - -- `AWS_ACCESS_KEY_ID`: Specifies an AWS access key associated with an IAM user or role. -- `AWS_SECRET_ACCESS_KEY`: Specifies the secret key associated with the access key. This is essentially the "password" - for the access key. - -Both credentials needs to -be [stored as secrets on GitHub](https://docs.github.com/en/actions/security-guides/encrypted-secrets) -and will be accessed in a workflow as environment variables. - -```shell -env: - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} -``` - -### Create an EC2 instance - -Access the AWS credentials (stored as GH Secrets) and create an EC2 instance - -Using [juju](https://juju.is/) as an orchestration, configure AWS credentials and deploy an EC2 instance with the -following configurations - -- Image: Ubuntu Server (latest) -- Type: t3a.xlarge -- Root disk: 80G -- Region: us-east-1 (default) - -#### Why juju? - -Juju allows easy configuration to various cloud providers. In the future, if there comes a reason to shift to another -infrastructure provider, it would allow us to pivot quickly. - -While juju provides more capability, the proposal is to use the tool as config management and a medium to deploy and -connect with EC2 instances. - -**Note**: Using GitHub Secrets to store AWS credentials will not allow any forked repositories to access the secrets. - -### Install a Kubernetes on the Instance - -Install Kubernetes on the EC2 instance where Kubeflow will be deployed and tested - -To install Kubernetes, we explored two options and propose to use **KinD** - -- [Microk8s](#microk8s) -- [KinD](#kind) - -#### KinD - -Using KinD, install Kubernetes with the existing KinD configuration managed by the Manifest WG. - -```shell -# Install dependencies - docker -sudo apt update -sudo apt install -y apt-transport-https ca-certificates curl software-properties-common tar -curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - -sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu focal stable" -apt-cache policy docker-ce -sudo apt install -y docker-ce -sudo systemctl status docker -sudo usermod -a -G docker ubuntu - -# Install dependencies - kubectl -sudo curl -L "https://storage.googleapis.com/kubernetes-release/release/`curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt`/bin/linux/amd64/kubectl" -o /usr/local/bin/kubectl -sudo chmod +x /usr/local/bin/kubectl -kubectl version --short --client - -# Install KinD -curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.17.0/kind-linux-amd64 -chmod +x ./kind -sudo mv ./kind /usr/local/bin/kind - -# Deploy kubernetes using KinD -cd manifests -kind create cluster --config ./tests/gh-actions/kind-cluster.yaml -``` - -##### Why KinD? - -While many tools can be leveraged to deploy Kubernetes, Manifest WG already leverages KinD to run both core and contrib -component tests. By reusing the tool, we can leverage the existing KinD configuration and keep the similarity between -component and e2e testing. - -**Note**: KinD is a subproject of Kubernetes but does not automatically release with a new Kubernetes version and does -not follow the Kubernetes release cadence. More details can be found at -[kind/issue#197](https://github.com/kubernetes-sigs/kind/issues/197). - -### Deploy Kubeflow - -Deploy Kubeflow, in the same manner, the manifests WG documents. - -Copy the manifest repo to the AWS instance and use Kustomize to run the Kubeflow installation. After Kustomize -installation is complete, verify all pods are running. - -Manifest installation may result in an infinite while loop; therefore, a time limit of 45mins should be set to ensure -installation exits when a problem occurs with Kubeflow installation. - -### Run Tests - -Execute integration tests to verify the correct functioning of different features using python scripts and jupyter -notebooks. - -As the first iteration, test the Kubeflow integration using the -existing [e2e mnist python script](https://github.com/kubeflow/manifests/tree/master/tests/e2e) -and [e2e mnist notebook](https://github.com/kubeflow/pipelines/blob/master/samples/contrib/kubeflow-e2e-mnist/kubeflow-e2e-mnist.ipynb) -. - -- [Python script](#python-script) -- [Jupyter notebook](#jupyter-notebook) - -Both python and notebook tests the following: - -- Kfp and Katib SDK packages (compatibility with other python packages) -- Creation and execution of a pipeline from a user namespace -- Creation and execution of hyperparameter running with Katib from a user namespace -- Creation and execution of distributive training with TFJob from a user namespace -- Creation and execution of inference using KServe from a user namespace - -**Note**: The mnist notebook does not test the Kubeflow Notebook resources. In the future, additional verification and -tests should be added to cover various Kubeflow components and features. - -#### Python script - -Step to run e2e python script from the workflow: - -1. Convert e2e mnist notebook to a python script ( - reuse [mnist.py](https://github.com/kubeflow/manifests/blob/master/tests/e2e/mnist.py)) -2. Run mnist python script outside of the cluster ( - reuse [runner.sh](https://github.com/kubeflow/manifests/blob/master/tests/e2e/runner.sh)) - -#### Jupyter notebook - -Step to run e2e notebook from the workflow: - -1. Get e2e mnist notebook - 1. To run the existing e2e mnist notebook, modification needs to be made in the last step to wait for the triggered - run to finish running before executing. Changes proposed are defined below and a pull request will need to be - made in the future to avoid copying mnist notebook into the manifest directory. - - ```shell - import numpy as np - import time - from PIL import Image - import requests - - # Pipeline Run should be succeeded. - run_status = kfp_client.get_run(run_id=run_id).run.status - - if run_status == None: - print("Waiting for the Run {} to start".format(run_id)) - time.sleep(60) - run_status = kfp_client.get_run(run_id=run_id).run.status - - while run_status == "Running": - print("Run {} is in progress".format(run_id)) - time.sleep(60) - run_status = kfp_client.get_run(run_id=run_id).run.status - - if run_status == "Succeeded": - print("Run {} has Succeeded\n".format(run_id)) - - # Specify the image URL here. - image_url = "https://raw.githubusercontent.com/kubeflow/katib/master/examples/v1beta1/kubeflow-pipelines/images/9.bmp" - image = Image.open(requests.get(image_url, stream=True).raw) - data = np.array(image.convert('L').resize((28, 28))).astype(float).reshape(-1, 28, 28, 1) - data_formatted = np.array2string(data, separator=",", formatter={"float": lambda x: "%.1f" % x}) - json_request = '{{ "instances" : {} }}'.format(data_formatted) - - # Specify the prediction URL. If you are runing this notebook outside of Kubernetes cluster, you should set the Cluster IP. - url = "http://{}-predictor-default.{}.svc.cluster.local/v1/models/{}:predict".format(name, namespace, name) - - time.sleep(60) - response = requests.post(url, data=json_request) - - print("Prediction for the image") - display(image) - print(response.json()) - else: - raise Exception("Run {} failed with status {}\n".format(run_id, kfp_client.get_run(run_id=run_id).run.status)) - ``` -2. Move the mnist notebook into the cluster - ```shell - kubectl -n kubeflow-user-example-com create configmap --from-file kubeflow-e2e-mnist.ipynb - ``` -3. Create a PodDefault to allow access to Kubeflow pipelines - ```shell - apiVersion: kubeflow.org/v1alpha1 - kind: PodDefault - metadata: - name: access-ml-pipeline - namespace: kubeflow-user-example-com - spec: - desc: Allow access to Kubeflow Pipelines - selector: - matchLabels: - access-ml-pipeline: "true" - env: - - ## this environment variable is automatically read by `kfp.Client()` - ## this is the default value, but we show it here for clarity - name: KF_PIPELINES_SA_TOKEN_PATH - value: /var/run/secrets/kubeflow/pipelines/token - volumes: - - name: volume-kf-pipeline-token - projected: - sources: - - serviceAccountToken: - path: token - expirationSeconds: 7200 - ## defined by the `TOKEN_REVIEW_AUDIENCE` environment variable on the `ml-pipeline` deployment - audience: pipelines.kubeflow.org - volumeMounts: - - mountPath: /var/run/secrets/kubeflow/pipelines - name: volume-kf-pipeline-token - readOnly: true - ``` -4. Run the notebook programmatically using a Kubernetes resource Job or Notebook - ```shell - apiVersion: batch/v1 - kind: Job - metadata: - name: test-notebook-job - namespace: kubeflow-user-example-com - spec: - backoffLimit: 1 - activeDeadlineSeconds: 1200 - template: - metadata: - labels: - access-ml-pipeline: "true" - spec: - restartPolicy: Never - initContainers: - - name: copy-notebook - image: busybox - command: ['sh', '-c', 'cp /scripts/* /etc/kubeflow-e2e/'] - volumeMounts: - - name: e2e-test - mountPath: /scripts - - name: kubeflow-e2e - mountPath: /etc/kubeflow-e2e - containers: - - image: kubeflownotebookswg/jupyter-scipy:v1.6.1 - imagePullPolicy: IfNotPresent - name: execute-notebook - command: - - /bin/sh - - -c - - | - jupyter nbconvert --to notebook --execute /etc/kubeflow-e2e/kubeflow-e2e-mnist.ipynb; - x=$(echo $?); curl -fsI -X POST http://localhost:15020/quitquitquit && exit $x; - volumeMounts: - - name: kubeflow-e2e - mountPath: /etc/kubeflow-e2e - serviceAccountName: default-editor - volumes: - - name: e2e-test - configMap: - name: e2e-test - - name: kubeflow-e2e - emptyDir: {} - ``` -5. Verify Job succeeded or failed - ```shell - kubectl -n kubeflow-user-example-com wait --for=condition=complete --timeout=1200s job/test-notebook-job - ``` - -### Log and Report Errors - -Report logs generated in the EC2 instance back to GitHub actions for users. - -For failures in the workflow steps, generate inspect logs, pod logs, and describe logs. Copy the generated logs back to -the GitHub Actions system and use [actions/upload-artifact@v2](https://github.com/actions/upload-artifact) -to allow users to access the logs when necessary. - -**Note**: As default, artifacts are retained for 90 days. The number of retention days is configurable. - -### Clean Up - -Regardless of the success or failure of the workflow, at the end of the workflow, the EC2 instance is deleted to ensure -there are no resources left behind. - -## Debugging - -To debug any failed step of the GitHub Actions -workflow, [debugging with ssh](https://github.com/marketplace/actions/debugging-with-ssh) -or other similar tools can be used to ssh into the GitHub system. In the GitHub system, juju can be used to connect to -an AWS EC2 instance. - -**Notes**: - -- GitHub secrets are limited to the Manifest repo and do not cascade to forked repositories. To debug, users must set up - their own AWS secrets. -- To debug the AWS EC2 instance without ssh into the GitHub system, you must have access to AWS credentials. Access to - AWS credentials is limited to [Manifest WG approvers](https://github.com/kubeflow/manifests/blob/master/OWNERS). - -## Proof of Concept Workflow - -The POC code -is [available](https://github.com/DomFleischmann/manifests/blob/aj-dev/.github/workflows/aws_e2e_tests.yaml) -with examples of both [successful](https://github.com/DomFleischmann/manifests/actions/runs/4118561167/jobs/7111228604) -and [failed](https://github.com/DomFleischmann/manifests/actions/runs/4119052861) runs. - -The proposed end-to-end workflow has been tested with the following Kubernetes and Kubeflow versions - -- 1.22 Kubernetes and [1.6.1 Kubeflow release](https://github.com/kubeflow/manifests/releases/tag/v1.6.1) (microk8s) -- 1.24 Kubernetes and main branch of the manifest - repo ([last commit](https://github.com/DomFleischmann/manifests/commit/8e5714171f1fd5b00f59f436e9ab8cb45a0f30e3)) ( - microk8s) -- 1.25 Kuberentes and main branch of the manifest - repo ([last commit](https://github.com/DomFleischmann/manifests/commit/8e5714171f1fd5b00f59f436e9ab8cb45a0f30e3)) ( - kind) - -### Alternative solutions considered - -#### Prow - -While there are some existing tests with Prow, those tests were discarded due to them not having been updated in 2 years -and there being a high amount of complexity in these tests. After some investigation, the Manifests Working Group -decided that it would be more work adapting those tests to the current state of manifests than starting from scratch -with lower complexity. - -#### Self-hosted runners - -Self-hosted runners are not recommended with public repositories due to security concerns with how it behaves on a pull -request made by a forked repository. - -#### MicroK8s - -Instead of KinD, [microk8s](https://microk8s.io/) was considered as an alternative to install Kubernetes. - -Below shows the steps required in the workflow to install microk8s and to install Kubernetes using microk8s. During -the Kubernetes installation, you must enable [dns](https://microk8s.io/docs/addon-dns), -[storage](https://microk8s.io/docs/addon-hostpath-storage), [ingress](https://microk8s.io/docs/addon-ingress), -[loadbalancer](https://microk8s.io/docs/addon-metallb), and [rbac](https://microk8s.io/docs/multi-user). - -```shell -# Install microk8s -sudo snap install microk8s --classic --channel ${{ matrix.microk8s }} -sudo apt update -sudo usermod -a -G microk8s ubuntu - -# Install dependencies - kubectl -sudo snap alias microk8s.kubectl kubectl - -# Deploy kubernetes using microk8s -sudo snap install microk8s --classic --channel 1.24/stable -microk8s enable dns hostpath-storage ingress metallb:10.64.140.43-10.64.140.49 rbac -``` - -**Note**: microk8s requires IP address pool when enabling dns, address pool of 10.64.140.43-10.64.140.49 is an arbitrary -decision. \ No newline at end of file diff --git a/deployment/kubeflow/manifests/proposals/README.md b/deployment/kubeflow/manifests/proposals/README.md deleted file mode 100644 index 32581a4..0000000 --- a/deployment/kubeflow/manifests/proposals/README.md +++ /dev/null @@ -1,16 +0,0 @@ -## Proposals - -If you wish to add a larger feature or make a major refactor to Manifests we encourage folks to write up a proposal document. -Though our process is not formal, the convention is to create a PR against Manifests with your proposal as markdown in the -proposals folder. Proposal reviews and feedback will happen on the PR with the proposal. - -``` - manifests/proposals <- folder - YYYYMMDD-title.md <- file -``` - -In your proposal it is good to consider and include some of the following: -* Goals -* Non-Goals -* Does this break any existing functionality? -* Does this fix/solve any outstanding issues? diff --git a/deployment/kubeflow/manifests/tests/README.md b/deployment/kubeflow/manifests/tests/README.md deleted file mode 100644 index e4acd66..0000000 --- a/deployment/kubeflow/manifests/tests/README.md +++ /dev/null @@ -1,27 +0,0 @@ -# Kustomize Manifest Tests - -## E2E - -There are 2 goals for the e2e tests: -1. Verify that core components can be applied and become Ready -2. Verify that core CRDs can be created and succeed - -### Components - -The e2e are installing everything by using the [single-install -command](../README.md#install-with-a-single-command). This means that all of -the core and common (Istio, Knativ, Cert Manager) are being installed and -included in the tests. - -### Test Suite - -**To use KFserving v0.5/0.6 instead of KServe v0.7, comment the 6,7th lines and uncomment the 10,11th lines here [e2e/utils/kserve.py](https://github.com/kubeflow/manifests/compare/master/tests/e2e/utils/kserve.py#L6-L11)** - -The e2e tests are completely independent of the underlying K8s cluster, as well -as the platform of the cluster. These tests should be able to run in real -world clusters, as well as ephemeral ones like KinD. - -The tests are also explicitly bypassing any authentication system of the -installation. The goal of these tests are to ensure that a common use case that -deploys all of Kubeflow's components can succeed. Thus we only want to test -that the core CRDs can be successfully applied and complete. diff --git a/deployment/kubeflow/manifests/tests/e2e/.gitignore b/deployment/kubeflow/manifests/tests/e2e/.gitignore deleted file mode 100644 index 77b431a..0000000 --- a/deployment/kubeflow/manifests/tests/e2e/.gitignore +++ /dev/null @@ -1 +0,0 @@ -**/pids.env diff --git a/deployment/kubeflow/manifests/tests/e2e/README.md b/deployment/kubeflow/manifests/tests/e2e/README.md deleted file mode 100644 index c009a3b..0000000 --- a/deployment/kubeflow/manifests/tests/e2e/README.md +++ /dev/null @@ -1,27 +0,0 @@ -# E2E Mnist - -We've converted the community's [E2E Notebook](https://github.com/kubeflow/pipelines/blob/master/samples/contrib/kubeflow-e2e-mnist/kubeflow-e2e-mnist.ipynb) into a python script. This test can be used to ensure the core Kubeflow CRDs can be applied and complete. - -This test is using the following Kubeflow CRDs: -1. Kubeflow Pipelines -2. Katib Experiments -3. TFJobs -4. KServe InferenceServices - -## How to run - -The heart of this test is the `mnist.py` python script, which applies and waits -for the CRDs to complete. The python scripts are all expecting that -1. `kubectl` is configured with access to a Kubeflow cluster -2. `kustomize` 3.2.0 is available -3. The KFP backend is proxied to localhost - -While the `mnist.py` is used for running the test, it is advised to use the -`runner.sh` script instead. The `runner.sh` script will be running the python -script, but also ensure the KFP backend is port-forwarded and will clean up -afterwards. - -## Failures - -Both the python and the bash scripts are designed to be failing early. If any -intermediate command fails, then the whole test will fail. diff --git a/deployment/kubeflow/manifests/tests/e2e/hack/cleanup_proxies.sh b/deployment/kubeflow/manifests/tests/e2e/hack/cleanup_proxies.sh deleted file mode 100755 index 139113c..0000000 --- a/deployment/kubeflow/manifests/tests/e2e/hack/cleanup_proxies.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/env bash -source pids.env - -echo "Killing background jobs..." - -kill -KILL $ISTIO_PID -echo "Killed istio port-forward." - -kill -KILL $PIPELINES_PID -echo "Killed pipelines port-forward." diff --git a/deployment/kubeflow/manifests/tests/e2e/hack/cleanup_yamls.sh b/deployment/kubeflow/manifests/tests/e2e/hack/cleanup_yamls.sh deleted file mode 100755 index 3701fe2..0000000 --- a/deployment/kubeflow/manifests/tests/e2e/hack/cleanup_yamls.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env bash -source pids.env - -echo "Killing background jobs..." - -kill -KILL $ISTIO_PID -echo "Killed istio port-forward." - -kill -KILL $PIPELINES_PID -echo "Killed pipelines port-forward." - -kubectl delete experiments.kubeflow.org -n kubeflow-user-example-com mnist-e2e - -kubectl delete tfjobs.kubeflow.org -n kubeflow-user-example-com mnist-e2e - -kubectl delete inferenceservices.serving.kserve.io -n kubeflow-user-example-com mnist-e2e diff --git a/deployment/kubeflow/manifests/tests/e2e/hack/proxy_istio.sh b/deployment/kubeflow/manifests/tests/e2e/hack/proxy_istio.sh deleted file mode 100755 index af13176..0000000 --- a/deployment/kubeflow/manifests/tests/e2e/hack/proxy_istio.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# stop all port-forward processes -trap ctrl_c INT - -function ctrl_c() { - echo "Stopping port-forward processes..." - echo "Killing process $ISTIO_PID..." - kill -KILL $ISTIO_PID -} - -kubectl port-forward -n istio-system svc/istio-ingressgateway 8080:80 & -ISTIO_PID=$! -echo "Started Istio port-forward, pid: $ISTIO_PID" -echo ISTIO_PID=$ISTIO_PID >> pids.env - -sleep 1 diff --git a/deployment/kubeflow/manifests/tests/e2e/hack/proxy_pipelines.sh b/deployment/kubeflow/manifests/tests/e2e/hack/proxy_pipelines.sh deleted file mode 100755 index 055eb3d..0000000 --- a/deployment/kubeflow/manifests/tests/e2e/hack/proxy_pipelines.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -kubectl port-forward -n kubeflow svc/ml-pipeline-ui 3000:80 & -PIPELINES_PID=$! - -echo "Started Pipelines port-forward, pid: $PIPELINES_PID" -echo PIPELINES_PID=$PIPELINES_PID >> pids.env - -sleep 1 diff --git a/deployment/kubeflow/manifests/tests/e2e/mnist.py b/deployment/kubeflow/manifests/tests/e2e/mnist.py deleted file mode 100644 index e5f4b6f..0000000 --- a/deployment/kubeflow/manifests/tests/e2e/mnist.py +++ /dev/null @@ -1,83 +0,0 @@ -"""E2E Kubeflow test that tesst Pipelines, Katib, TFJobs and KServe. - -Requires: -pip install kfp==1.8.4 -pip install kubeflow-katib==0.12.0 -""" -import kfp -import kfp.dsl as dsl -from kubernetes import config - -import settings -from utils import isvc, katib, kserve, tfjob - -config.load_kube_config() - - -@dsl.pipeline( - name="End to End Pipeline", - description="An end to end mnist example including hyperparameter tuning, " - "train and inference", -) -def mnist_pipeline(name=settings.PIPELINE_NAME, - namespace=settings.NAMESPACE, - training_steps=settings.TRAINING_STEPS): - # Run the hyperparameter tuning with Katib. - katib_op = katib.create_katib_experiment_task( - name, namespace, training_steps) - - # Create volume to train and serve the model. - model_volume_op = dsl.VolumeOp( - name="model-volume", - resource_name="model-volume", - size="1Gi", - modes=dsl.VOLUME_MODE_RWO, - ) - - # Run the distributive training with TFJob. - tfjob_op = tfjob.create_tfjob_task(name, namespace, training_steps, - katib_op, model_volume_op) - - # Create the KServe Inference - kserve.create_serving_task(name, namespace, tfjob_op, - model_volume_op) - - -if __name__ == "__main__": - # Run the Kubeflow Pipeline in the user's namespace. - kfp_client = kfp.Client(host="http://localhost:3000", - namespace="kubeflow-user-example-com") - kfp_client.runs.api_client.default_headers.update( - {"kubeflow-userid": "kubeflow-user-example-com"}) - - # create the KFP run - run_id = kfp_client.create_run_from_pipeline_func( - mnist_pipeline, - namespace=settings.NAMESPACE, - arguments={}, - ).run_id - print("Run ID: ", run_id) - - katib.wait_to_create(name=settings.EXPERIMENT_NAME, - namespace=settings.NAMESPACE, - timeout=settings.TIMEOUT) - - tfjob.wait_to_create(name=settings.EXPERIMENT_NAME, - namespace=settings.NAMESPACE, - timeout=settings.TIMEOUT) - - tfjob.wait_to_succeed(name=settings.TFJOB_NAME, - namespace=settings.NAMESPACE, - timeout=settings.TIMEOUT) - - katib.wait_to_succeed(name=settings.EXPERIMENT_NAME, - namespace=settings.NAMESPACE, - timeout=settings.TIMEOUT) - - isvc.wait_to_create(settings.ISVC_NAME, - namespace=settings.NAMESPACE, - timeout=settings.TIMEOUT) - - isvc.wait_to_succeed(settings.ISVC_NAME, - namespace=settings.NAMESPACE, - timeout=settings.TIMEOUT) diff --git a/deployment/kubeflow/manifests/tests/e2e/requirements.txt b/deployment/kubeflow/manifests/tests/e2e/requirements.txt deleted file mode 100644 index 1608509..0000000 --- a/deployment/kubeflow/manifests/tests/e2e/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -kfp==1.8.22 -kubeflow-katib==0.15.0 \ No newline at end of file diff --git a/deployment/kubeflow/manifests/tests/e2e/runner.sh b/deployment/kubeflow/manifests/tests/e2e/runner.sh deleted file mode 100755 index 440f304..0000000 --- a/deployment/kubeflow/manifests/tests/e2e/runner.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -echo "Installing necessary RBAC.""" -kubectl apply -f yamls - -echo "Setting up port-forward..." -./hack/proxy_istio.sh -./hack/proxy_pipelines.sh - -echo "Running the tests.""" -python3 mnist.py - -echo "Cleaning up opened processes.""" -./hack/cleanup_proxies.sh - -echo "Leaving the cluster as is for further inspection." diff --git a/deployment/kubeflow/manifests/tests/e2e/settings.py b/deployment/kubeflow/manifests/tests/e2e/settings.py deleted file mode 100644 index 2a24346..0000000 --- a/deployment/kubeflow/manifests/tests/e2e/settings.py +++ /dev/null @@ -1,9 +0,0 @@ -NAMESPACE = "kubeflow-user-example-com" -TIMEOUT = 600 - -PIPELINE_NAME = "mnist-e2e" -EXPERIMENT_NAME = "mnist-e2e" -TFJOB_NAME = "mnist-e2e" -ISVC_NAME = "mnist-e2e" - -TRAINING_STEPS = "1" diff --git a/deployment/kubeflow/manifests/tests/e2e/utils/isvc.py b/deployment/kubeflow/manifests/tests/e2e/utils/isvc.py deleted file mode 100644 index 255e69f..0000000 --- a/deployment/kubeflow/manifests/tests/e2e/utils/isvc.py +++ /dev/null @@ -1,20 +0,0 @@ -from . import watch - -GROUP = "serving.kserve.io" #"serving.kubeflow.org" -PLURAL = "inferenceservices" -VERSION = "v1beta1" - - -# wait_for_ready(name, namespace, timeout): -def wait_to_create(name, namespace, timeout): - """Wait until the specified InferenceService gets created.""" - return watch.wait_created_cr(name, namespace, - timeout=timeout, group=GROUP, plural=PLURAL, - version=VERSION) - - -def wait_to_succeed(name, namespace, timeout): - """Wait until the specified InferenceService succeeds.""" - return watch.wait_to_succeed(name=name, namespace=namespace, - timeout=timeout, group=GROUP, plural=PLURAL, - version=VERSION) diff --git a/deployment/kubeflow/manifests/tests/e2e/utils/katib.py b/deployment/kubeflow/manifests/tests/e2e/utils/katib.py deleted file mode 100644 index 4491706..0000000 --- a/deployment/kubeflow/manifests/tests/e2e/utils/katib.py +++ /dev/null @@ -1,199 +0,0 @@ -"""Katib helper functions to create a Pipeline task.""" -from kfp import components -from kubeflow.katib import (ApiClient, V1beta1AlgorithmSpec, - V1beta1ExperimentSpec, V1beta1FeasibleSpace, - V1beta1ObjectiveSpec, V1beta1ParameterSpec, - V1beta1TrialParameterSpec, V1beta1TrialTemplate) - -from . import watch - -TFJOB_IMAGE = "docker.io/liuhougangxa/tf-estimator-mnist" -KATIB_LAUNCHER_URL = "https://raw.githubusercontent.com/kubeflow/pipelines/1.8.0-rc.1/components/kubeflow/katib-launcher/component.yaml" - -GROUP = "kubeflow.org" -PLURAL = "experiments" -VERSION = "v1beta1" - - -def wait_to_create(name, namespace, timeout): - """Wait until the specified Katib Experiment gets created.""" - return watch.wait_created_cr(name, namespace, - timeout=timeout, group=GROUP, plural=PLURAL, - version=VERSION) - - -def wait_to_succeed(name, namespace, timeout): - """Wait until the specified Katib Experiment succeeds.""" - return watch.wait_to_succeed(name=name, namespace=namespace, - timeout=timeout, group=GROUP, plural=PLURAL, - version=VERSION) - - -# This function converts Katib Experiment HP results to args. -def convert_katib_results(katib_results) -> str: - import json - import pprint - katib_results_json = json.loads(katib_results) - print("Katib results:") - pprint.pprint(katib_results_json) - best_hps = [] - for pa in katib_results_json["currentOptimalTrial"]["parameterAssignments"]: - if pa["name"] == "learning_rate": - best_hps.append("--tf-learning-rate=" + pa["value"]) - elif pa["name"] == "batch_size": - best_hps.append("--tf-batch-size=" + pa["value"]) - print("Best Hyperparameters: {}".format(best_hps)) - return " ".join(best_hps) - - -# You should define the Experiment name, namespace and number of training steps -# in the arguments. -def create_katib_experiment_task(experiment_name, experiment_namespace, - training_steps): - # Trial count specification. - max_trial_count = 5 - max_failed_trial_count = 3 - parallel_trial_count = 2 - - # Objective specification. - objective = V1beta1ObjectiveSpec( - type="minimize", - goal=0.001, - objective_metric_name="loss", - ) - - # Algorithm specification. - algorithm = V1beta1AlgorithmSpec( - algorithm_name="random", - ) - - # Experiment search space. - # In this example we tune learning rate and batch size. - parameters = [ - V1beta1ParameterSpec( - name="learning_rate", - parameter_type="double", - feasible_space=V1beta1FeasibleSpace( - min="0.01", - max="0.05", - ), - ), - V1beta1ParameterSpec( - name="batch_size", - parameter_type="int", - feasible_space=V1beta1FeasibleSpace( - min="80", - max="100", - ), - ), - ] - - # Experiment Trial template. - # TODO (andreyvelich): Use community image for the mnist example. - trial_spec = { - "apiVersion": "kubeflow.org/v1", - "kind": "TFJob", - "spec": { - "tfReplicaSpecs": { - "Chief": { - "replicas": 1, - "restartPolicy": "OnFailure", - "template": { - "metadata": { - "annotations": { - "sidecar.istio.io/inject": "false", - }, - }, - "spec": { - "containers": [ - { - "name": "tensorflow", - "image": TFJOB_IMAGE, - "command": [ - "python", - "/opt/model.py", - "--tf-train-steps=" - + str(training_steps), - "--tf-learning-rate=${trialParameters.learningRate}", - "--tf-batch-size=${trialParameters.batchSize}" - ] - } - ] - } - } - }, - "Worker": { - "replicas": 1, - "restartPolicy": "OnFailure", - "template": { - "metadata": { - "annotations": { - "sidecar.istio.io/inject": "false", - }, - }, - "spec": { - "containers": [ - { - "name": "tensorflow", - "image": "docker.io/liuhougangxa/tf-estimator-mnist", - "command": [ - "python", - "/opt/model.py", - "--tf-train-steps=" - + str(training_steps), - "--tf-learning-rate=${trialParameters.learningRate}", - "--tf-batch-size=${trialParameters.batchSize}" - ] - } - ] - } - } - } - } - } - } - - # Configure parameters for the Trial template. - trial_template = V1beta1TrialTemplate( - primary_container_name="tensorflow", - primary_pod_labels={"training.kubeflow.org/job-role": "master"}, - trial_parameters=[ - V1beta1TrialParameterSpec( - name="learningRate", - description="Learning rate for the training model", - reference="learning_rate", - ), - V1beta1TrialParameterSpec( - name="batchSize", - description="Batch size for the model", - reference="batch_size", - ), - ], - trial_spec=trial_spec, - ) - - # Create an Experiment from the above parameters. - experiment_spec = V1beta1ExperimentSpec( - max_trial_count=max_trial_count, - max_failed_trial_count=max_failed_trial_count, - parallel_trial_count=parallel_trial_count, - objective=objective, - algorithm=algorithm, - parameters=parameters, - trial_template=trial_template, - ) - - # Create the KFP task for the Katib Experiment. - # Experiment Spec should be serialized to a valid Kubernetes object. - katib_experiment_launcher_op = components.load_component_from_url( - KATIB_LAUNCHER_URL, - ) - - op = katib_experiment_launcher_op( - experiment_name=experiment_name, - experiment_namespace=experiment_namespace, - experiment_spec=ApiClient().sanitize_for_serialization(experiment_spec), - experiment_timeout_minutes=60, - delete_finished_experiment=False) - - return op diff --git a/deployment/kubeflow/manifests/tests/e2e/utils/kserve.py b/deployment/kubeflow/manifests/tests/e2e/utils/kserve.py deleted file mode 100644 index d62ca6b..0000000 --- a/deployment/kubeflow/manifests/tests/e2e/utils/kserve.py +++ /dev/null @@ -1,28 +0,0 @@ -from kfp import components - - -def create_serving_task(model_name, model_namespace, tfjob_op, model_volume_op): - - api_version = 'serving.kserve.io/v1beta1' - serving_component_url = 'https://raw.githubusercontent.com/kubeflow/pipelines/master/components/kserve/component.yaml' - - # Uncomment the following two lines if you are using KFServing v0.6.x or v0.5.x - # api_version = 'serving.kubeflow.org/v1beta1' - # serving_component_url = 'https://raw.githubusercontent.com/kubeflow/pipelines/master/components/kubeflow/kfserving/component.yaml' - - inference_service = ''' -apiVersion: "{}" -kind: "InferenceService" -metadata: - name: {} - namespace: {} - annotations: - "sidecar.istio.io/inject": "false" -spec: - predictor: - tensorflow: - storageUri: "pvc://{}/" -'''.format(api_version, model_name, model_namespace, str(model_volume_op.outputs["name"])) - - serving_launcher_op = components.load_component_from_url(serving_component_url) - serving_launcher_op(action="apply", inferenceservice_yaml=inference_service).after(tfjob_op) \ No newline at end of file diff --git a/deployment/kubeflow/manifests/tests/e2e/utils/tfjob.py b/deployment/kubeflow/manifests/tests/e2e/utils/tfjob.py deleted file mode 100644 index 0f3d352..0000000 --- a/deployment/kubeflow/manifests/tests/e2e/utils/tfjob.py +++ /dev/null @@ -1,127 +0,0 @@ -from kfp import components - -from . import katib, watch - -TFJOB_URL = "https://raw.githubusercontent.com/kubeflow/pipelines/master/components/kubeflow/launcher/component.yaml" - -GROUP = "kubeflow.org" -PLURAL = "tfjobs" -VERSION = "v1" - - -def wait_to_create(name, namespace, timeout): - """Wait until the specified TFJob gets created.""" - return watch.wait_created_cr(name, namespace, - timeout=timeout, group=GROUP, plural=PLURAL, - version=VERSION) - - -def wait_to_succeed(name, namespace, timeout): - """Wait until the specified TFJob succeeds.""" - return watch.wait_to_succeed(name=name, namespace=namespace, - timeout=timeout, group=GROUP, plural=PLURAL, - version=VERSION) - - -# You should define the TFJob name, namespace, number of training steps, output -# of Katib and model volume tasks in the arguments. -def create_tfjob_task(tfjob_name, tfjob_namespace, training_steps, katib_op, - model_volume_op): - import json - - # Get parameters from the Katib Experiment. - # Parameters are in the format - # "--tf-learning-rate=0.01 --tf-batch-size=100" - convert_katib_results_op = components.func_to_container_op( - katib.convert_katib_results, - ) - best_hp_op = convert_katib_results_op(katib_op.output) - best_hps = str(best_hp_op.output) - - # Create the TFJob Chief and Worker specification with the best - # Hyperparameters. - # TODO (andreyvelich): Use community image for the mnist example. - tfjob_chief_spec = { - "replicas": 1, - "restartPolicy": "OnFailure", - "template": { - "metadata": { - "annotations": { - "sidecar.istio.io/inject": "false", - }, - }, - "spec": { - "containers": [ - { - "name": "tensorflow", - "image": "docker.io/liuhougangxa/tf-estimator-mnist", - "command": [ - "sh", - "-c", - ], - "args": [ - "python /opt/model.py " - "--tf-export-dir=/mnt/export " - "--tf-train-steps={} {}".format(training_steps, - best_hps), - ], - "volumeMounts": [ - { - "mountPath": "/mnt/export", - "name": "model-volume", - }, - ], - }, - ], - "volumes": [ - { - "name": "model-volume", - "persistentVolumeClaim": { - "claimName": str(model_volume_op.outputs["name"]), - }, - }, - ], - }, - }, - } - - tfjob_worker_spec = { - "replicas": 1, - "restartPolicy": "OnFailure", - "template": { - "metadata": { - "annotations": { - "sidecar.istio.io/inject": "false", - }, - }, - "spec": { - "containers": [ - { - "name": "tensorflow", - "image": "docker.io/liuhougangxa/tf-estimator-mnist", - "command": [ - "sh", - "-c", - ], - "args": [ - "python /opt/model.py " - "--tf-export-dir=/mnt/export " - "--tf-train-steps={} {}".format(training_steps, - best_hps), - ], - }, - ], - }, - }, - } - - # Create the KFP task for the TFJob. - tfjob_launcher_op = components.load_component_from_url(TFJOB_URL) - op = tfjob_launcher_op( - name=tfjob_name, - namespace=tfjob_namespace, - chief_spec=json.dumps(tfjob_chief_spec), - worker_spec=json.dumps(tfjob_worker_spec), - tfjob_timeout_minutes=60, - delete_finished_tfjob=False) - return op diff --git a/deployment/kubeflow/manifests/tests/e2e/utils/watch.py b/deployment/kubeflow/manifests/tests/e2e/utils/watch.py deleted file mode 100644 index 902c53a..0000000 --- a/deployment/kubeflow/manifests/tests/e2e/utils/watch.py +++ /dev/null @@ -1,76 +0,0 @@ -from kubernetes import client, watch - - -def wait_created_cr(name, namespace, group, version, plural, timeout): - """Wait until the specified CR gets created.""" - w = watch.Watch() - custom_api = client.CustomObjectsApi() - - fn = custom_api.list_namespaced_custom_object - - print("Waiting for %s %s.%s to get created..." % (name, plural, group)) - for event in w.stream(func=fn, namespace=namespace, group=group, - version=version, plural=plural, - timeout_seconds=timeout): - if event["type"] != "ADDED": - continue - - cr = event["object"] - if cr["metadata"]["name"] != name: - continue - - # the requested CR got created - print("%s %s.%s got created." % (name, plural, group)) - w.stop() - return True - - raise RuntimeError("Timeout reached waiting for CR %s %s.%s" % - (name, plural, group)) - - -def succeeded(job): - """Check if the CR has either a Ready or Succeeded condition""" - if "status" not in job: - return False - - if "conditions" not in job["status"]: - return False - - for condition in job["status"]["conditions"]: - if "Succeeded" in condition["type"]: - return condition["status"] == "True" - - if "Ready" in condition["type"]: - return condition["status"] == "True" - - return False - - -def wait_to_succeed(name, namespace, group, version, plural, timeout): - """Wait until the specified TFJob succeeds.""" - w = watch.Watch() - custom_api = client.CustomObjectsApi() - - cr = {} - fn = custom_api.list_namespaced_custom_object - - print("Waiting for %s %s.%s to succeed..." % (name, plural, group)) - for event in w.stream(func=fn, namespace=namespace, group=group, - version=version, plural=plural, - timeout_seconds=timeout): - - cr = event["object"] - if cr["metadata"]["name"] != name: - continue - - if event["type"] == "DELETED": - raise RuntimeError("%s %s.%s was deleted." % - (name, plural, group)) - - if succeeded(cr): - w.stop() - print("%s %s.%s succeeded." % (name, plural, group)) - return - - raise RuntimeError("Timeout reached waiting for %s %s.%s to succeed: %s" % - (name, plural, version, cr)) diff --git a/deployment/kubeflow/manifests/tests/e2e/yamls/role.yaml b/deployment/kubeflow/manifests/tests/e2e/yamls/role.yaml deleted file mode 100644 index ce0f527..0000000 --- a/deployment/kubeflow/manifests/tests/e2e/yamls/role.yaml +++ /dev/null @@ -1,78 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - annotations: - name: pipeline-runner - namespace: kubeflow-user-example-com -rules: -- apiGroups: - - "" - resources: - - persistentvolumes - - persistentvolumeclaims - verbs: - - '*' -- apiGroups: - - argoproj.io - resources: - - workflows - verbs: - - get - - list - - watch - - update - - patch -- apiGroups: - - "" - resources: - - pods - - pods/exec - - pods/log - - services - verbs: - - '*' -- apiGroups: - - "" - - apps - - extensions - resources: - - deployments - - replicasets - verbs: - - '*' -- apiGroups: - - experiments.kubeflow.org - resources: - - '*' - verbs: - - '*' -- apiGroups: - - pipelines.kubeflow.org - resources: - - '*' - verbs: - - '*' -- apiGroups: - - kubeflow.org - resources: - - '*' - verbs: - - '*' -- apiGroups: - - batch - resources: - - jobs - verbs: - - '*' -- apiGroups: - - machinelearning.seldon.io - resources: - - seldondeployments - verbs: - - '*' -- apiGroups: - - serving.kubeflow.org - resources: - - '*' - verbs: - - '*' diff --git a/deployment/kubeflow/manifests/tests/e2e/yamls/sa-role-binding.yaml b/deployment/kubeflow/manifests/tests/e2e/yamls/sa-role-binding.yaml deleted file mode 100644 index 648f13e..0000000 --- a/deployment/kubeflow/manifests/tests/e2e/yamls/sa-role-binding.yaml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: sa-pipeline-runner - namespace: kubeflow-user-example-com -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: pipeline-runner -subjects: -- kind: ServiceAccount - name: pipeline-runner - namespace: kubeflow-user-example-com diff --git a/deployment/kubeflow/manifests/tests/e2e/yamls/service-account.yaml b/deployment/kubeflow/manifests/tests/e2e/yamls/service-account.yaml deleted file mode 100644 index 47af572..0000000 --- a/deployment/kubeflow/manifests/tests/e2e/yamls/service-account.yaml +++ /dev/null @@ -1,5 +0,0 @@ -apiVersion: v1 -kind: ServiceAccount -metadata: - name: pipeline-runner - namespace: kubeflow-user-example-com diff --git a/deployment/kubeflow/manifests/tests/e2e/yamls/user-role-binding.yaml b/deployment/kubeflow/manifests/tests/e2e/yamls/user-role-binding.yaml deleted file mode 100644 index 81633fc..0000000 --- a/deployment/kubeflow/manifests/tests/e2e/yamls/user-role-binding.yaml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: user-pipeline-runner - namespace: kubeflow-user-example-com -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: pipeline-runner -subjects: -- kind: User - name: kubeflow-user-example-com - namespace: kubeflow-user-example-com diff --git a/deployment/kubeflow/manifests/tests/gh-actions/install_argo_cli.sh b/deployment/kubeflow/manifests/tests/gh-actions/install_argo_cli.sh deleted file mode 100755 index 8b4f696..0000000 --- a/deployment/kubeflow/manifests/tests/gh-actions/install_argo_cli.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash -set -e -kubectl create namespace argo -# Download the binary -curl -sLO https://github.com/argoproj/argo-workflows/releases/download/v3.1.3/argo-linux-amd64.gz - -# Unzip -gunzip argo-linux-amd64.gz - -# Make binary executable -chmod +x argo-linux-amd64 - -# Move binary to path -mv ./argo-linux-amd64 /usr/local/bin/argo \ No newline at end of file diff --git a/deployment/kubeflow/manifests/tests/gh-actions/install_cert_manager.sh b/deployment/kubeflow/manifests/tests/gh-actions/install_cert_manager.sh deleted file mode 100755 index dcb855a..0000000 --- a/deployment/kubeflow/manifests/tests/gh-actions/install_cert_manager.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash -set -e -echo "Installing cert-manager ..." -cd common/cert-manager -kubectl create namespace cert-manager -kustomize build cert-manager/base | kubectl apply -f - -echo "Waiting for cert-manager to be ready ..." -kubectl wait --for=condition=ready pod -l 'app in (cert-manager,webhook)' --timeout=180s -n cert-manager -kustomize build kubeflow-issuer/base | kubectl apply -f - \ No newline at end of file diff --git a/deployment/kubeflow/manifests/tests/gh-actions/install_istio-cni.sh b/deployment/kubeflow/manifests/tests/gh-actions/install_istio-cni.sh deleted file mode 100755 index 05b2d0a..0000000 --- a/deployment/kubeflow/manifests/tests/gh-actions/install_istio-cni.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash -set -e -echo "Installing Istio-cni ..." -cd common/istio-cni-1-17 -kustomize build istio-crds/base | kubectl apply -f - -kustomize build istio-namespace/base | kubectl apply -f - -kustomize build istio-install/base | kubectl apply -f - \ No newline at end of file diff --git a/deployment/kubeflow/manifests/tests/gh-actions/install_istio.sh b/deployment/kubeflow/manifests/tests/gh-actions/install_istio.sh deleted file mode 100755 index eb01319..0000000 --- a/deployment/kubeflow/manifests/tests/gh-actions/install_istio.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash -set -e -echo "Installing Istio ..." -cd common/istio-1-17 -kustomize build istio-crds/base | kubectl apply -f - -kustomize build istio-namespace/base | kubectl apply -f - -kustomize build istio-install/base | kubectl apply -f - - -echo "Waiting for all Istio Pods to become ready..." -kubectl wait --for=condition=Ready pods --all -n istio-system --timeout 300s diff --git a/deployment/kubeflow/manifests/tests/gh-actions/install_kind.sh b/deployment/kubeflow/manifests/tests/gh-actions/install_kind.sh deleted file mode 100755 index ce8811a..0000000 --- a/deployment/kubeflow/manifests/tests/gh-actions/install_kind.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash -set -e -echo "Fetching KinD executable ..." -sudo swapoff -a -sudo rm -f /swapfile -sudo mkdir -p /tmp/etcd -sudo mount -t tmpfs tmpfs /tmp/etcd -curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.20.0/kind-linux-amd64 -chmod +x ./kind -sudo mv kind /usr/local/bin \ No newline at end of file diff --git a/deployment/kubeflow/manifests/tests/gh-actions/install_knative-cni.sh b/deployment/kubeflow/manifests/tests/gh-actions/install_knative-cni.sh deleted file mode 100755 index 4381f0a..0000000 --- a/deployment/kubeflow/manifests/tests/gh-actions/install_knative-cni.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash -set -euo pipefail -echo "Installing KNative with istio-cni ..." -set +e -kustomize build common/knative/knative-serving/base | kubectl apply -f - -set -e -kustomize build common/knative/knative-serving/base | kubectl apply -f - - -kustomize build common/istio-cni-1-17/cluster-local-gateway/base | kubectl apply -f - -kustomize build common/istio-cni-1-17/kubeflow-istio-resources/base | kubectl apply -f - - -kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout 600s -kubectl patch cm config-domain --patch '{"data":{"example.com":""}}' -n knative-serving diff --git a/deployment/kubeflow/manifests/tests/gh-actions/install_knative.sh b/deployment/kubeflow/manifests/tests/gh-actions/install_knative.sh deleted file mode 100755 index cf409a6..0000000 --- a/deployment/kubeflow/manifests/tests/gh-actions/install_knative.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash -set -euo pipefail -echo "Installing KNative ..." -set +e -kustomize build common/knative/knative-serving/base | kubectl apply -f - -set -e -kustomize build common/knative/knative-serving/base | kubectl apply -f - - -kustomize build common/istio-1-17/cluster-local-gateway/base | kubectl apply -f - -kustomize build common/istio-1-17/kubeflow-istio-resources/base | kubectl apply -f - - -kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout 600s -kubectl patch cm config-domain --patch '{"data":{"example.com":""}}' -n knative-serving diff --git a/deployment/kubeflow/manifests/tests/gh-actions/install_kserve.sh b/deployment/kubeflow/manifests/tests/gh-actions/install_kserve.sh deleted file mode 100755 index 80cad4e..0000000 --- a/deployment/kubeflow/manifests/tests/gh-actions/install_kserve.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash -set -euo pipefail -echo "Installing Kserve ..." -cd contrib/kserve -set +e -kustomize build kserve | kubectl apply -f - -set -e -echo "Waiting for crd/clusterservingruntimes.serving.kserve.io to be available ..." -kubectl wait --for condition=established --timeout=30s crd/clusterservingruntimes.serving.kserve.io -kustomize build kserve | kubectl apply -f - -kustomize build models-web-app/overlays/kubeflow | kubectl apply -f - -kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout 600s diff --git a/deployment/kubeflow/manifests/tests/gh-actions/install_kustomize.sh b/deployment/kubeflow/manifests/tests/gh-actions/install_kustomize.sh deleted file mode 100755 index 16829f7..0000000 --- a/deployment/kubeflow/manifests/tests/gh-actions/install_kustomize.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -e -curl --silent --location --remote-name "https://github.com/kubernetes-sigs/kustomize/releases/download/kustomize%2Fv5.0.3/kustomize_v5.0.3_linux_amd64.tar.gz" -tar -xzvf kustomize_v5.0.3_linux_amd64.tar.gz -chmod a+x kustomize -sudo mv kustomize /usr/local/bin/kustomize diff --git a/deployment/kubeflow/manifests/tests/gh-actions/install_pipelines.sh b/deployment/kubeflow/manifests/tests/gh-actions/install_pipelines.sh deleted file mode 100755 index d21dd73..0000000 --- a/deployment/kubeflow/manifests/tests/gh-actions/install_pipelines.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash -set -euo pipefail -echo "Installing Pipelines ..." -cd apps/pipeline/upstream -kubectl create ns kubeflow -kubectl apply -f third-party/metacontroller/base/crd.yaml -echo "Waiting for crd/compositecontrollers.metacontroller.k8s.io to be available ..." -kubectl wait --for condition=established --timeout=30s crd/compositecontrollers.metacontroller.k8s.io -kustomize build env/cert-manager/platform-agnostic-multi-user | kubectl apply -f - -sleep 60 -kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout 600s diff --git a/deployment/kubeflow/manifests/tests/gh-actions/kf-objects/katib_test.yaml b/deployment/kubeflow/manifests/tests/gh-actions/kf-objects/katib_test.yaml deleted file mode 100644 index 5d0c4a9..0000000 --- a/deployment/kubeflow/manifests/tests/gh-actions/kf-objects/katib_test.yaml +++ /dev/null @@ -1,78 +0,0 @@ ---- -apiVersion: kubeflow.org/v1beta1 -kind: Experiment -metadata: - name: grid-example - namespace: kubeflow-user -spec: - parameters: - - name: lr - parameterType: double - feasibleSpace: - max: '0.01' - min: '0.001' - step: '0.001' - - name: num-layers - parameterType: int - feasibleSpace: - max: '3' - min: '2' - - name: optimizer - parameterType: categorical - feasibleSpace: - list: - - adam - objective: - type: maximize - goal: 0.80 - objectiveMetricName: Validation-accuracy - additionalMetricNames: - - Train-accuracy - metricStrategies: - - name: Validation-accuracy - value: max - - name: Train-accuracy - value: max - algorithm: - algorithmName: grid - trialTemplate: - trialSpec: - apiVersion: batch/v1 - kind: Job - spec: - template: - metadata: - labels: - sidecar.istio.io/inject: 'false' - spec: - containers: - - command: - - python3 - - /opt/mxnet-mnist/mnist.py - - '--batch-size=64' - - '--lr=${trialParameters.learningRate}' - - '--num-layers=${trialParameters.numberLayers}' - - '--optimizer=${trialParameters.optimizer}' - image: docker.io/kubeflowkatib/mxnet-mnist:latest - name: training-container - restartPolicy: Never - trialParameters: - - name: learningRate - description: Learning rate for the training model - reference: lr - - name: numberLayers - description: Number of training model layers - reference: num-layers - - name: optimizer - description: Training model optimizer (sdg, adam or ftrl) - reference: optimizer - primaryContainerName: training-container - successCondition: status.conditions.#(type=="Complete")#|#(status=="True")# - failureCondition: status.conditions.#(type=="Failed")#|#(status=="True")# - parallelTrialCount: 1 - maxTrialCount: 1 - maxFailedTrialCount: 1 - metricsCollectorSpec: - collector: - kind: StdOut - resumePolicy: LongRunning \ No newline at end of file diff --git a/deployment/kubeflow/manifests/tests/gh-actions/kf-objects/kserve_test.yaml b/deployment/kubeflow/manifests/tests/gh-actions/kf-objects/kserve_test.yaml deleted file mode 100644 index e7c5129..0000000 --- a/deployment/kubeflow/manifests/tests/gh-actions/kf-objects/kserve_test.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: "serving.kserve.io/v1beta1" -kind: "InferenceService" -metadata: - name: "sklearn-iris" -spec: - predictor: - sklearn: - resources: - limits: - cpu: "1" - memory: 2Gi - requests: - cpu: "0.1" - memory: 200M - storageUri: "gs://kfserving-examples/models/sklearn/1.0/model" \ No newline at end of file diff --git a/deployment/kubeflow/manifests/tests/gh-actions/kf-objects/test_pipeline.py b/deployment/kubeflow/manifests/tests/gh-actions/kf-objects/test_pipeline.py deleted file mode 100755 index 9bd8228..0000000 --- a/deployment/kubeflow/manifests/tests/gh-actions/kf-objects/test_pipeline.py +++ /dev/null @@ -1,28 +0,0 @@ -import kfp -from kfp import dsl -import kfp.components as comp - - -@comp.create_component_from_func -def echo_op(): - print("Test pipeline") - -@dsl.pipeline( - name='test-pipeline', - description='A test pipeline.' -) -def hello_world_pipeline(): - echo_task = echo_op() - -if __name__ == "__main__": - # Run the Kubeflow Pipeline in the user's namespace. - kfp_client = kfp.Client(host="http://localhost:3000", - namespace="kubeflow-user-example-com") - kfp_client.runs.api_client.default_headers.update( - {"kubeflow-userid": "kubeflow-user-example-com"}) - # create the KFP run - run_id = kfp_client.create_run_from_pipeline_func( - hello_world_pipeline, - namespace="kubeflow-user-example-com", - arguments={}, - ).run_id \ No newline at end of file diff --git a/deployment/kubeflow/manifests/tests/gh-actions/kf-objects/tfjob.yaml b/deployment/kubeflow/manifests/tests/gh-actions/kf-objects/tfjob.yaml deleted file mode 100644 index 313c331..0000000 --- a/deployment/kubeflow/manifests/tests/gh-actions/kf-objects/tfjob.yaml +++ /dev/null @@ -1,21 +0,0 @@ -apiVersion: "kubeflow.org/v1" -kind: TFJob -metadata: - name: tfjob-simple - namespace: kubeflow -spec: - tfReplicaSpecs: - Worker: - replicas: 2 - restartPolicy: OnFailure - template: - spec: - containers: - - name: tensorflow - image: gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0 - command: - - "python" - - "/var/tf_mnist/mnist_with_summaries.py" - - "--log_dir=/train/logs" - - "--learning_rate=0.01" - - "--batch_size=150" \ No newline at end of file diff --git a/deployment/kubeflow/manifests/tests/gh-actions/kind-cluster-1-24.yaml b/deployment/kubeflow/manifests/tests/gh-actions/kind-cluster-1-24.yaml deleted file mode 100644 index bd047c1..0000000 --- a/deployment/kubeflow/manifests/tests/gh-actions/kind-cluster-1-24.yaml +++ /dev/null @@ -1,27 +0,0 @@ -# This testing option is available for testing projects that don't yet support k8s 1.25 -apiVersion: kind.x-k8s.io/v1alpha4 -kind: Cluster -# Configure registry for KinD. -containerdConfigPatches: -- |- - [plugins."io.containerd.grpc.v1.cri".registry.mirrors."$REGISTRY_NAME:$REGISTRY_PORT"] - endpoint = ["http://$REGISTRY_NAME:$REGISTRY_PORT"] -# This is needed in order to support projected volumes with service account tokens. -# See: https://kubernetes.slack.com/archives/CEKK1KTN2/p1600268272383600 -kubeadmConfigPatches: - - | - apiVersion: kubeadm.k8s.io/v1beta2 - kind: ClusterConfiguration - metadata: - name: config - apiServer: - extraArgs: - "service-account-issuer": "kubernetes.default.svc" - "service-account-signing-key-file": "/etc/kubernetes/pki/sa.key" -nodes: -- role: control-plane - image: kindest/node:v1.24.7@sha256:577c630ce8e509131eab1aea12c022190978dd2f745aac5eb1fe65c0807eb315 -- role: worker - image: kindest/node:v1.24.7@sha256:577c630ce8e509131eab1aea12c022190978dd2f745aac5eb1fe65c0807eb315 -- role: worker - image: kindest/node:v1.24.7@sha256:577c630ce8e509131eab1aea12c022190978dd2f745aac5eb1fe65c0807eb315 diff --git a/deployment/kubeflow/manifests/tests/gh-actions/kind-cluster-1-25.yaml b/deployment/kubeflow/manifests/tests/gh-actions/kind-cluster-1-25.yaml deleted file mode 100644 index 3a1385b..0000000 --- a/deployment/kubeflow/manifests/tests/gh-actions/kind-cluster-1-25.yaml +++ /dev/null @@ -1,26 +0,0 @@ -apiVersion: kind.x-k8s.io/v1alpha4 -kind: Cluster -# Configure registry for KinD. -containerdConfigPatches: -- |- - [plugins."io.containerd.grpc.v1.cri".registry.mirrors."$REGISTRY_NAME:$REGISTRY_PORT"] - endpoint = ["http://$REGISTRY_NAME:$REGISTRY_PORT"] -# This is needed in order to support projected volumes with service account tokens. -# See: https://kubernetes.slack.com/archives/CEKK1KTN2/p1600268272383600 -kubeadmConfigPatches: - - | - apiVersion: kubeadm.k8s.io/v1beta2 - kind: ClusterConfiguration - metadata: - name: config - apiServer: - extraArgs: - "service-account-issuer": "kubernetes.default.svc" - "service-account-signing-key-file": "/etc/kubernetes/pki/sa.key" -nodes: -- role: control-plane - image: kindest/node:v1.25.3@sha256:f52781bc0d7a19fb6c405c2af83abfeb311f130707a0e219175677e366cc45d1 -- role: worker - image: kindest/node:v1.25.3@sha256:f52781bc0d7a19fb6c405c2af83abfeb311f130707a0e219175677e366cc45d1 -- role: worker - image: kindest/node:v1.25.3@sha256:f52781bc0d7a19fb6c405c2af83abfeb311f130707a0e219175677e366cc45d1 \ No newline at end of file diff --git a/deployment/kubeflow/manifests/tests/gh-actions/kind-cluster.yaml b/deployment/kubeflow/manifests/tests/gh-actions/kind-cluster.yaml deleted file mode 100644 index 84ea5a2..0000000 --- a/deployment/kubeflow/manifests/tests/gh-actions/kind-cluster.yaml +++ /dev/null @@ -1,26 +0,0 @@ -apiVersion: kind.x-k8s.io/v1alpha4 -kind: Cluster -# Configure registry for KinD. -containerdConfigPatches: -- |- - [plugins."io.containerd.grpc.v1.cri".registry.mirrors."$REGISTRY_NAME:$REGISTRY_PORT"] - endpoint = ["http://$REGISTRY_NAME:$REGISTRY_PORT"] -# This is needed in order to support projected volumes with service account tokens. -# See: https://kubernetes.slack.com/archives/CEKK1KTN2/p1600268272383600 -kubeadmConfigPatches: - - | - apiVersion: kubeadm.k8s.io/v1beta2 - kind: ClusterConfiguration - metadata: - name: config - apiServer: - extraArgs: - "service-account-issuer": "kubernetes.default.svc" - "service-account-signing-key-file": "/etc/kubernetes/pki/sa.key" -nodes: -- role: control-plane - image: kindest/node:v1.26.6@sha256:5e5d789e90c1512c8c480844e0985bc3b4da4ba66179cc5b540fe5b785ca97b5 -- role: worker - image: kindest/node:v1.26.6@sha256:5e5d789e90c1512c8c480844e0985bc3b4da4ba66179cc5b540fe5b785ca97b5 -- role: worker - image: kindest/node:v1.26.6@sha256:5e5d789e90c1512c8c480844e0985bc3b4da4ba66179cc5b540fe5b785ca97b5 \ No newline at end of file diff --git a/deployment/kustomization.yaml b/deployment/kustomization.yaml index 7c1c6cc..c5d654e 100644 --- a/deployment/kustomization.yaml +++ b/deployment/kustomization.yaml @@ -2,7 +2,6 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: -- ./kubeflow/manifests/apps/pipeline/upstream/cluster-scoped-resources - ./kubeflow/manifests/in-cluster-setup - ./kubeflow-custom - ./mlflow/env/local diff --git a/deployment/kubeflow/manifests/tests/e2e/utils/__init__.py b/tests/__init__.py similarity index 100% rename from deployment/kubeflow/manifests/tests/e2e/utils/__init__.py rename to tests/__init__.py diff --git a/tests/__pycache__/__init__.cpython-38.pyc b/tests/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3f6f9045b6fd872d27bf051a4c9123891cefdb63 GIT binary patch literal 170 zcmWIL<>g`kf>mA0sUZ3>h(HF6K#l_t7qb9~6oz01O-8?!3`HPe1o6vXKO;XkRX-~~ zv9L5VPv0fAIJ+djKtDGpzo0n2ASbaTEx#yNKP9zHza+J|q!=Qh3zvWh#>Z#oWtPOp X>lIYq;;_lhPbtkwwF6o78HgDGjASd? literal 0 HcmV?d00001 diff --git a/tests/__pycache__/conftest.cpython-38-pytest-7.1.3.pyc b/tests/__pycache__/conftest.cpython-38-pytest-7.1.3.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9c1c84a7496c0b93ba5018cc56953e2ec0dfffae GIT binary patch literal 2019 zcmbW2Pj4GV6u@WJ_Im9&j{h`iO44>g0@X;33#AAlK#fvY1k$9cT?%_ytv%0)llAU0 zGj1DSkZ7+(y_cf+248>!AA%F7xpF|)T@ zie3@&M{`h`(Ea)Sd6rorUKt8dX4z+y5-1)mu-t@t6-K=(RG@lL@M=&Ke?ac2&TOdl zDa&hp-(4U;>=^xGLGv>2xPDU&6h8YUN+EdGPx zKpC0pug}EJvBjsvdy~;NX)OJ%D}B2`)m#_{%xAHL=;657i=+NvcMWB2O0|9x$URls z*t*~DwC?%a>vviWdTbAb8^y9g$4hTtNe-_ygqvzyJ-yx#4a>`*UJuvDyyo-axzL)d`_Vmll6YN%L?PpkqVINAmn`Ji3>$+nO=Cm;WuIS>k5riQyH|R^? zuG|B+4MGg!$c?-E(39?7AlxoM} zdhA~auVNyPcba?g5SsgO@agy8$iP6CdX z=Q*wIul(DaTP^q^C3?tNycp-wrU3}ZG5;WOQ8!o$HKHR4c* pmZ)RZXwfR--;zVJ3 zGoujeq!(;nsLI1V6DaBB>fm$mDdv@@e1xNTpnF!9ov^Cy?dk66nd$y@k3L&k3JE+v zfA<>y-6iB#9GrhV7~F+d{sM#%MspHV^R;@mMJVYxu}dqB7kftI$G-UnalritHF!g2 zJ)x9vf3m&10`fz_T;7P5sCCvLDswsb<`Wga|ePA;ROTZxa_AL-l4eK?D64???4GKKb&Ku{E+L-V_tyqwEW3Y>%C>JN8B{3l42@cjU0zDq-QF zB_D%q0cEg;&lX4Id%~7}uyGAYVSSf8M2*cZoY$}h1Z{a+`*`M#%C+6)Dt7uMlS6B* z&@_C8XME-WiEEo>ub*?pnLweY8?nDLD2lA;4&T_?+4*?q-PR75ORD@(PeHxc#z$E+z!Qxk~##Uk=cNE%3%k<3=NA>&n?&-}f( zm*$_Bd>vd!j0&WJwy5|kaL+IX21jtJAQfHXQP+Tbl;#8ee84a_^G@(z7$|Z~AY4%E zhaT3n{82%c7#&&r&>v8~Z%rL$D;xX5p(k&H-W@@I5ch%ekSk0dnOl5iEGSqU7z^OQ z0&@P;xw5ypu}F zg)&}lMG`07ib4}r9p3F9nes@5UQVFZ=1>iNfzW`Spcg@%>5!xW(sgJtM_DA*Y6oND~m1c3;N=z34aU~Tcip!04PkJ zIi+5}<*WJEV>76H-aJ3}`-_OWWDazOxTFfjTi{mKfe_oH0o2pK$g8AI#0^{yvHV=g z*&sj&nhHMwMnTCj`oz?&96|OhWMnG}7y9>Ew?E>lJ$&c6yx)vu_-}*} zFs=6t6aZW2t_9Rxk7N{%O}DEm%@)`m5HQz~+>C0K@)}4*8_D00U}VMHNY;?7BiR5F z*~ZQJG~PI$xmz!0ZpCD7el1mv8S^8}zbHF2S%aM2LhqM#a0*Sc551aHU~X5C0W`G* z|5{{p7v)lVNQYFOZ#}2!5yl-%7I?kOJ&yCEQ|fDtDO(G?|-ao(5F7CuDl>I z{V_=d!YOWSez3Lqk7WDaKeu*2xwpBst8KUit4Lw%(oQcGhuSR#gZpv;3JdBLuE`;< zu!n~^7YV%gglW^k?Ow?SIlqH^|DA>Ua^QuuP8Z#fHs~KJ Cb;iE{ literal 0 HcmV?d00001 diff --git a/tests/__pycache__/test_kfp.cpython-38-pytest-7.1.3.pyc b/tests/__pycache__/test_kfp.cpython-38-pytest-7.1.3.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4fd6fa0e74ed4584b72b9b753c53b460cfcbe70e GIT binary patch literal 4966 zcmai2O>Er86(+e{?vJR|O8%27|#RdM(gP&pfuh_|`*DE>ibGiPYJ)->I`-kY&p zOU)GAg6fNIQS~Lar24X3R{fAWqPW6v+7nsGsnzmv^nOE=^8II zPT)Ps@(*+x9h~B)4|VqpFCL8Zi9@|{mRaXCR=`_4r@8081zrODJS)2|up#$FX1g!3 zVb@_5_hmNXeuuxJeivBvp~*(s*nQLeEgwxtcK%RjFR&NyYy0}X?oKj&Rja-9n54Q?)3ahnL`gdeBdXrA{JB5ZMe&)FE3D^E4;r8_LewU-uHH4+D9kR$#vbJM^2hlC zwIxtH(Zd84gJi%?9#q*WcKSY18J!1DhdMCM%xbs$oZrDoVT9xUKOy|GgJ+&gE}F0& zC){x47I%_155kR*Gw0UIq7x=g!h?>$aD5lZ_Z+{)94U4k89VX1^uw0p10_kqxa9MfmpI%F@Epdoxblih3(QNu)2sV7YH0$tQ}|CyVoQ%lPbI^A@Io0!qH%4z?8l zVRh1Jw__nWQUnjCJB)Amok$+_I94YKTbs@|cLRg#vD{L$dCOmqIIEQv#hJD* z{H6m_o^d|x8#|a1cex-byzYdpAnGt8+ZM5mgE)F-UElg=lz_guaV(rZmjgdJzIxy> z|9|xWv$wkEUo%Q_M;ZuQ8?k6A87i!$THdvJi?!phB|Y+08@dby4|=0v#_xGiycxFC zWI0T{zP__cU#nhEt80izBYuQ1;0-7wkN*NkPRHI~TU&NWxt#&L7eyM zYmUDOBSD;T#1OCr1-lz!m=;>or1cBwkm`FO3A>nWUv8x%J>b+Oz;V0_!p4%C;pWv) zl+-9QnJ;}YLe%Pugy#e}9T}1zCC(0q89`&~Q1&jAc-N0Y=1e+Eu`}So@i*WA&hpLG zHJJ2_lN#?h0iAL182F&%w~9A;5zQu|*3VDx>0P~n=)>Tr4ILhs`;GCb3E$G6=EaxhEQLh@syq&8QOuh2dDpmAKeoVK4aq_8|FXv6&0*>*Om9GM3KQ*ljhPVW( zUmDp_YW6n5h-V}GC=YvN9svny@K9bP2T{3k92O|G2IWSI|0c^bYtv!{R(xQ5_U}C- z{d@X5nL999=}^}-iQFIyvdGHF6FFIGl%)mB9C~CR`r8v48%74Dd^&`DS_w5a($%HS zst+ucSBH`LDhDHMw2M`b3`LQ~V>D(b9_0wq%^y?`wY`EI?IO2gY&OOem6M=d?3xEB zy2ZxHqqV2#>$cYVp{dDJ+hy@Y*Y--fmX2em)9iG&G}x(I`jt-EiDCX1r90YBw3cy4 z<*Tl7U>u^b?2RaI-vJgp?95VDppqnzQEE)KQd`;gWU`w`-b`)E6q9hE9bd3?Jh>Z8 z_vR73VK2$1FZqtYn--|ozeIlD_s(}N1D=OS=!DxAbcVBc`5 z-tv?$icYI$iPr&jE4}>U1%9xFeY27;Hak>*C2kRDO8a6Gm|2MoA!)-U(le`_^=2rM z<{aQSJ0Wt_!aPD!^|l{MNFCTv%E= z2D-IcU!e;17-)HJb@lemm3d*&_SW?%1gkEEV$k=9;SCWty@41#u)B@wH7gOF)#bVC zb(D+13Q;y9yl--mnUu-gGdtki1KWy7pJCJo*@(A=LL}l|BVPBA-`om0`PcsCjxtWN zIR?rMkT2Em1-wlqMwaIkyJ}^nv7Rza5|VgTF%k5b80s;>=m6phX{<~qWgMkX;{cVmM7-EXUZYwYzH0Vj04tyZXEM)|oQJeWXQcGA;SWyURrlucIO!<%v3(rPCNI1)NbDdRXB z5ISeD>7rYZvB$zdX2UX;ene#p5fDEH%i=1UEHC0tiwP2hSfJ(_8h9v)%(VymE5&|5 z(;pIin016IpNL>07HNWV_SgAD%whNkyh#;}rrQQIWS}+9;!Dj2G%~a(ZHYHz{=i}uiM{or`YzBnk^%y~EbZ%?IxDi$ z1B33|5Po?Hm+m;&CfDKjc#u|bL2F47M^rboxN8Lh(u!P5T^y5iVu3D@jvv8E85Ktk zN?b%xF_r$PH`R(IA`V1&eA2l_w@(U2bPS%e+H3+N*N7aHijf_Lqla<+y>+^ImY9Ep?O9_7;AJorJ=LHkJP0h zbNf0jTKbWkjV19KFlu>4PIqZ)4O8;1x<~z3Y$Xx5LcptrIU3#yMJKqtwEh zDcqZYr)s?-%nhY96sHtDwC*Qia2>F4Gcy;KZrsdD2+^p~DY_Rul>$9axoc)F;|v6( zGRgC>A?|-L-dnVxOU=jB{FIs*YTlt{mYR2|Q3^0A8mRoGwBp)x?GP~o7hD7l<(5ii znc4Kkc9zF~6gWtMavv;9iTYc0RGe+btP}Ao@m@(o4LS6k zOW7iZS=8yKfRkMpEecTGCEa$_Mc4fa#jFZ+rTzpVKzq)eAtl)fi8*ubbME6i-~H~x zV$oLM_ucQWu+p@m{09?fe+DEzfhRmr6@@BP^%bJZx7O4(MOB&xG1a8RB9<)4kc@n1 zNmjmd>DwlGnJ$o`e3wWG-kLws93`W&58WSYmPxrePR5%PWJ2Z{{$z8COiADLr<*fm zCjH(lnT7V2Ki9lKE~I($WM1}tkzAx1n)NR=FO$p73aO|HyV!UQp4Vyag^H>E752uF zO0H6yyh-!qEm|ON(;};~`Tjdc%5#;fwDdb|R{4|iXN|l|N60lg%4Qo2bnGLAU6x)M zyo&V3!7DWu`%84>MQ7hTu<|+A#aAx%DfwNj$ZgmAvftfy$J93=p}j?-tVV% zOHj8$Uz2rjQT2gRef=1BWTUFag>`?&51y|09&3rJ9+#WUZ8_nSj>{Q!80P_x$7P9YgocPICRXfH1<`9aSy=YHnyCHoA)>ZoaezD>tO2e41f3$ELnjk{1+IZG;rTFs#AlS)Y`QW zMK82({?=2vf9dWBy>HOm5#UFdeM@Admwm4ERFM;QKQD2kP#ba6(;~HBp!rBc1Rbe8 zT@?EzT8Q){ttwDH66vxJEGeS*NNIgwC}MQq=6_d}o}nt;8E7{~OOY{bXZ%)uq}+n2 z>lt4fkCo4rmi8E0-cll^uN^^DCaGU3o8VCeFf_6eXZAbWtS!8q)+D_Cp@Xd9cM&sbXyDIN`( zZ~&n7eXq6abhsCfr`x)m@!V6llDI=(A#s&^Kw(2s@NQS#D>cCfur~P75W#20N z8ZPAK(q7PHOO3#N((zhLt1R3XK^qn`XopVQcLl6*bBVHpB?Nbv6f6u2l1#Ktvi^(h z!+1)jUzyMq_+B^=sT1lb{B5-)-*XyYgz{tL?sIU*j3M0r2??qSMW_fF zpgW-A0p0Z!p$kKp!oom!VF0`{jVuNPh6Bw}8v-Lw3lJFHp4Q##UPJIF07lS93PKsY zV&aKXe}n?DorCvx0B>Ewy9DrtKp4VXAHrMzQa=Z8t)Kfrc#m#$Ocn>I z2s&q<9TGcau#Q6d7Wf^`_$W-DkD%&>8 z@EJ6-V5(*Q8v3uJ!5!jRFmY~uZDZ}e1niK6_#75rK!dx;FQT~whUA;>K6AD^^?fEt zk+WuSFi7$@u+~*HZ=!h%Ox1|XSSQ6)()a@? z4RPFxKBsAF`v2a_4A6B3Rkxs49Yb&ML;l1^@+V?WD1B9E5p4Z)WIRpii4WXoq@EFJ zkscX4I=cSDw=O!akMYQ8Z185tV!i3ISmQTeo?(F< zxW3b5f_wGwKC};U4-^>}w#q*S|6DLwkb#dJ`&Z!L*r_VnSOZ2!E`?FUEYwJCn84=B zknyo5%nwe1Eb_j{r(vQ;dZQqUk%po2Lhl)ov8w#$&7LXy&O~OTbPDpu2-JieJYQ7h z$z#dWxsC3{6(3G@pufr~-Bf^qDouI(m50SH7v2u6P<@wa1^; zHrLi`aM*6#Ua#?Yu+el%vz+aEA<#XD6FKYl^a2xR2ok=XhJ=qS_`eJw#3 z)sS8r5DyYH8j!0A@uGgA^|YQIX$^~-yDC!c3k?vYpG%vFtz8w=g%#4KRkJEE2VXCAKJ>DJhDAM6lg29Vy@Z7qC1MWuu&=b-icHzN8|F?EYwEMAi|Y zROU(|hupEqgy_hNa+Hbk&$Tb~UZGd)mBcuZIrOxnO~J7OZN}+nR2ceWQE}*(qf+YQ z_{P)mOwfraE5BykGHDSmRxX-+B7kv2r`)Az?4?~R4 z^?RuBD+38s0kyMS>CT=jJcavm)&v=3u=ZGP_+b2ClsKFx}$eI zA>&`h;o{;V=Cr(UkI~9bQq--Us_lepDsRh9Kfbzitup8r64m9dU8!uYKd3ENu7=%Q zrLulIakG$pQ)bTJ1KEZ1X}cUwC*XjvB7$^w{Fg9heizL$nvc+Y2&OwZXeRF{Ft&s? zO!cxAQukrw0H)~byzNv!2`*w3wf->W}AC0L2@899XVfOqnn&XBoppS;3Va8 zCj3+U9MVR0gk)fNpmgDsLs$`0P93B%$QhE63FjHHD5~l2-sI{Q!g<{Y2$~K zBp-*F@d+?@61jK-(>Kw)BpmG@)q`^F&S3Ra_~`@y7u0VgubV%p_mbN~a+`1%Twmbu zgV$eS2;meMMW0s_g=Yu)O zDdnZIk24U}NNRDm9<+9FW~b5x#JC%J^%W@ac4K303V;Bs*Gx zsU_M610lIQTtN>hNU}qmVXXs?!>!b4x_m!215V+3iXxk%4wuX$DI@_3?%WwkBZFKj z|LjLVIUS$<6l91J{sK%^DJxm^?7sjvNQ~U9e6kSHC5UrFM*JK^J>D{H9iB2gGucdb JF8iUa{vXzMEQA06 literal 0 HcmV?d00001 diff --git a/tests/__pycache__/test_registry.cpython-38-pytest-7.1.3.pyc b/tests/__pycache__/test_registry.cpython-38-pytest-7.1.3.pyc new file mode 100644 index 0000000000000000000000000000000000000000..32d95bcad3166c0fdb32a44dcf5fe666761ca791 GIT binary patch literal 2120 zcmaJ?&2rl|5C%v}qGz`bfC;6qzQbc*R{ITnmsun;ad z3*e&@BPfMsr<~4nK_#p@Rg7q1ya?|S$-l!&n=aF9=h(SU&FKxga*oHVq_BaA32$)& zIcuM=TVTIQ7MyibayCerRNf-z7QIcXzhkmUmfjl99aWSZfBHFD|6k7yax3!$-vRFJ}2uvnA^?Q31xmr zBVm8J_u+tAUqyWRF+h0&FTW2$peZK!9C9SIu`UdeJHsMB)QCp(cbeh^+}h}zG#;xk z{b*==5wTUYYMvf$i`Wk0z8BcD$m;ca53Zf(g2WS1E673+4~O6&4L=&h(gNTT76TAm z>VTEh0Sak7e$i?_b)U7`O{QXek60CkRQ%OzjN*_s# z6v)Rg5l8@#XWDP|H&~!kC=r;iz0pp!sV4MOJT=aNgbL^%&^PFNw1-|^nh{;WeDdoH zPHi!w_NVi&knS@IOh`8k{0Kn5veiEF#Yin$Haz{9G1?dNyx6U-eQ*pa;L=z5Q%7yy z|M>%R-)!1yfsW#UP*#_iNqp3)<)jfOG?Gi*R=3%1b((IkxzlZL_nIAeA7yUf1)QEd=*;Msv2m7P`^e+AA93LY0VQ?CM~INe(SHg; z_r1ruKz2Rk;bsi7Y)zj@`L-(@l=mN9!KvlnFnM>Wrq;lEt}67G^dJ zoYNRQ#unhqtO!CDRKS6MAoJO()MB17)9lC5Kslss#W7P~=%m^G7Y;n`_n*KPANo_P z^K4fZhg7toV^JneH^t6%WnT3#uozu9N7)s~%AR<@XTd5el2s6n)!Ny9-b}${iWM2` zDdGxyX@nj-l)0Ee5kQkX^b>y|3-Ev9vlx0I9G((L76kNiRX+;8OIi5HLGxi4lSx3o vRTSnAL6~S6nRxaqX;4dA1zXx0UegS0=w_~BS~!m@So_;BN;wPq+)Voi*^xMC literal 0 HcmV?d00001 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..1df8804 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,36 @@ +import subprocess +import pathlib +from dotenv import load_dotenv +import os + +ENV_FILE = pathlib.Path(__file__).parent.parent / "config.env" +load_dotenv(dotenv_path=ENV_FILE) + +CLUSTER_NAME = os.getenv("CLUSTER_NAME") +assert CLUSTER_NAME is not None +CONTEXT_NAME = f"kind-{CLUSTER_NAME}" + +HOST_IP = os.getenv("HOST_IP") +assert HOST_IP is not None + +# MLFLOW +MLFLOW_ENV_FILE = pathlib.Path(__file__).parent.parent / "deployment/mlflow/env/local" / "config.env" +MLFLOW_SECRETS_FILE = pathlib.Path(__file__).parent.parent / "deployment/mlflow/env/local" / "secret.env" + +load_dotenv(dotenv_path=MLFLOW_ENV_FILE, override=True) +AWS_ACCESS_KEY_ID = os.getenv("MINIO_ACCESS_KEY") +assert AWS_ACCESS_KEY_ID is not None + +load_dotenv(dotenv_path=MLFLOW_SECRETS_FILE, override=True) +AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY") +assert AWS_SECRET_ACCESS_KEY is not None + + +def pytest_sessionstart(session): + """ + Called after the Session object has been created and + before performing collection and entering the run test loop. + """ + print(f"Set up kubectl context ({CONTEXT_NAME}) before starting the tests.") + + subprocess.run(["kubectl", "config", "use-context", CONTEXT_NAME], stdout=True) diff --git a/tests/requirements-tests.txt b/tests/requirements-tests.txt new file mode 100644 index 0000000..81232d0 --- /dev/null +++ b/tests/requirements-tests.txt @@ -0,0 +1,15 @@ +# recommended using python >= 3.8 + +numpy~=1.23.3 +pandas~=1.5.0 +scikit-learn~=1.1.2 +mlflow~=2.4.1 +minio~=7.1.11 +kfp~=1.8.10 +boto3~=1.24.80 +pytest~=7.1.3 +pytest-order~=1.0.1 +pytest-timeout==2.1.0 +jq~=1.3.0 +python-dotenv~=0.21.0 +envsubst~=0.1.5 diff --git a/tests/resources/access_management/create_user/create_user.sh b/tests/resources/access_management/create_user/create_user.sh new file mode 100755 index 0000000..78252fd --- /dev/null +++ b/tests/resources/access_management/create_user/create_user.sh @@ -0,0 +1,95 @@ +#!/bin/bash + +# usage: $./create_user.sh +# e.g. $./create_user.sh john users + +set -eoau pipefail + + +function print_help { + echo "" + echo "options:" + echo " -h Print this help message and exit" + echo " -u=USER_NAME " + echo " -g=GROUP_NAME " + echo " -o=OUTPUT_DIR " + +} + +while getopts hu:g:o: flag +do + case "${flag}" in + h) + print_help + exit 0 + ;; + u) USER_NAME=${OPTARG};; + g) GROUP_NAME=${OPTARG};; + o) OUTDIR=${OPTARG};; + *) + echo 'Error in command line parsing' >&2 + print_help + exit 1 + esac +done + +if [ -u "$USER_NAME" ]; then + echo 'Missing -u / USER_NAME' >&2 + exit 1 +fi +if [ -g "$GROUP_NAME" ]; then + echo 'Missing -g / GROUP_NAME' >&2 + exit 1 +fi +if [ -o "$OUTDIR" ]; then + echo 'Missing -o / OUTDIR' >&2 + exit 1 +fi + +echo "USER_NAME: $USER_NAME"; +echo "GROUP_NAME: $GROUP_NAME"; +echo "OUTDIR: $OUTDIR"; + +TEMPLATES_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +# create CSR config for the user +cat "$TEMPLATES_DIR"/csr.cnf.template | envsubst > "$OUTDIR"/"$USER_NAME"-csr.cnf + +# create user key +openssl genrsa -out "$OUTDIR"/"$USER_NAME".key 4096 + +# create CSR +openssl req -config "$OUTDIR"/"$USER_NAME"-csr.cnf -new -key "$OUTDIR"/"$USER_NAME".key -nodes -out "$OUTDIR"/"$USER_NAME".csr + +# encode the .csr file in base64 +BASE64_CSR=$(cat "$OUTDIR"/"$USER_NAME".csr | base64 | tr -d '\n') + +# create the CertificateSigninRequest +cat "$TEMPLATES_DIR"/csr.yaml.template | envsubst > "$OUTDIR"/csr.yaml + +kubectl apply -f "$OUTDIR"/csr.yaml + +# approve the certificate +sleep 2 +kubectl certificate approve "$USER_NAME"-csr + +### generate kubeconfig ### + +# Cluster Name (from the current context) +export CLUSTER_NAME=$(kubectl config view --minify -o jsonpath={.current-context}) +# Client certificate +export CLIENT_CERTIFICATE_DATA=$(kubectl get csr "$USER_NAME"-csr -o jsonpath='{.status.certificate}') +# Cluster Certificate Authority +export CLUSTER_CA=$(kubectl config view --raw -o json | jq -r '.clusters[] | select(.name == "'$(kubectl config current-context)'") | .cluster."certificate-authority-data"') +# API Server endpoint +export CLUSTER_ENDPOINT=$(kubectl config view --raw -o json | jq -r '.clusters[] | select(.name == "'$(kubectl config current-context)'") | .cluster."server"') + +echo "user: $USER_NAME, cluster name: $CLUSTER_NAME, client certificate (length): ${#CLIENT_CERTIFICATE_DATA}, Cluster Certificate Authority (length): ${#CLUSTER_CA}, API Server endpoint: $CLUSTER_ENDPOINT" + +cat "$TEMPLATES_DIR"/kubeconfig.template | envsubst > "$OUTDIR"/"$USER_NAME"-kubeconfig + +# add user private key +kubectl config set-credentials $USER_NAME --client-key="$OUTDIR"/"$USER_NAME".key --embed-certs=true --kubeconfig="$OUTDIR"/"$USER_NAME"-kubeconfig + +# confirm user access +kubectl version --kubeconfig="$OUTDIR"/"$USER_NAME"-kubeconfig \ No newline at end of file diff --git a/tests/resources/access_management/create_user/csr.cnf.template b/tests/resources/access_management/create_user/csr.cnf.template new file mode 100644 index 0000000..0091554 --- /dev/null +++ b/tests/resources/access_management/create_user/csr.cnf.template @@ -0,0 +1,15 @@ +[ req ] +default_bits = 2048 +prompt = no +default_md = sha256 +distinguished_name = dn + +[ dn ] +CN = ${USER_NAME} +O = ${GROUP_NAME} + +[ v3_ext ] +authorityKeyIdentifier=keyid,issuer:always +basicConstraints=CA:FALSE +keyUsage=keyEncipherment,dataEncipherment +extendedKeyUsage=clientAuth \ No newline at end of file diff --git a/tests/resources/access_management/create_user/csr.yaml.template b/tests/resources/access_management/create_user/csr.yaml.template new file mode 100644 index 0000000..dccd33c --- /dev/null +++ b/tests/resources/access_management/create_user/csr.yaml.template @@ -0,0 +1,14 @@ +apiVersion: certificates.k8s.io/v1 +kind: CertificateSigningRequest +metadata: + name: ${USER_NAME}-csr +spec: + signerName: kubernetes.io/kube-apiserver-client + + groups: + - system:authenticated + request: ${BASE64_CSR} + usages: + - client auth + - digital signature + - key encipherment diff --git a/tests/resources/access_management/create_user/kubeconfig.template b/tests/resources/access_management/create_user/kubeconfig.template new file mode 100644 index 0000000..b2935b2 --- /dev/null +++ b/tests/resources/access_management/create_user/kubeconfig.template @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: Config +clusters: +- cluster: + certificate-authority-data: ${CLUSTER_CA} + server: ${CLUSTER_ENDPOINT} + name: ${CLUSTER_NAME} +users: +- name: ${USER_NAME} + user: + client-certificate-data: ${CLIENT_CERTIFICATE_DATA} +contexts: +- context: + cluster: ${CLUSTER_NAME} + user: ${USER_NAME} + name: ${USER_NAME}-${CLUSTER_NAME} +current-context: ${USER_NAME}-${CLUSTER_NAME} \ No newline at end of file diff --git a/tests/resources/kfp/Dockerfile b/tests/resources/kfp/Dockerfile new file mode 100644 index 0000000..fa12fec --- /dev/null +++ b/tests/resources/kfp/Dockerfile @@ -0,0 +1,10 @@ +FROM python:3.8.1 + +WORKDIR /app + +COPY requirements.txt . +RUN pip install -r requirements.txt --quiet --no-cache-dir + +COPY train.py . + +CMD ["python", "train.py"] diff --git a/tests/resources/kfp/__init__.py b/tests/resources/kfp/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/resources/kfp/build_image.sh b/tests/resources/kfp/build_image.sh new file mode 100755 index 0000000..6ced22e --- /dev/null +++ b/tests/resources/kfp/build_image.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +set -eux + +IMAGE_NAME=kfp-test-img + +IMAGE_TAG=test-kfp + +FULL_IMAGE_NAME=${IMAGE_NAME}:${IMAGE_TAG} + +cd "$(dirname "$0")" + +docker build -t "$FULL_IMAGE_NAME" . + +# load the image into the local "kind" cluster with name "kind-ep" +kind load docker-image "$FULL_IMAGE_NAME" --name $CLUSTER_NAME + +# to push the image to a remote repository instead +#docker push "$FULL_IMAGE_NAME" \ No newline at end of file diff --git a/tests/resources/kfp/pipeline.yaml b/tests/resources/kfp/pipeline.yaml new file mode 100644 index 0000000..5b3de83 --- /dev/null +++ b/tests/resources/kfp/pipeline.yaml @@ -0,0 +1,34 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: test-kubeflow-pipeline- + annotations: {pipelines.kubeflow.org/kfp_sdk_version: 1.8.10, pipelines.kubeflow.org/pipeline_compilation_time: '2022-09-24T16:47:23.603487', + pipelines.kubeflow.org/pipeline_spec: '{"description": "Pipeline to test an example + component", "name": "Test Kubeflow pipeline"}'} + labels: {pipelines.kubeflow.org/kfp_sdk_version: 1.8.10} +spec: + entrypoint: test-kubeflow-pipeline + templates: + - name: kfp-example + container: + args: [] + command: [python, train.py] + image: kfp-test-img:test-kfp + metadata: + labels: + pipelines.kubeflow.org/kfp_sdk_version: 1.8.10 + pipelines.kubeflow.org/pipeline-sdk-type: kfp + pipelines.kubeflow.org/enable_caching: "true" + annotations: {pipelines.kubeflow.org/component_spec: '{"description": "Example + component using MLflow", "implementation": {"container": {"args": [], "command": + ["python", "train.py"], "image": "kfp-test-img:test-kfp"}}, "inputs": [], + "name": "KFP example", "outputs": []}', pipelines.kubeflow.org/component_ref: '{"digest": + "8c96d2df65ae29c70b3c82f839474c9399c5c18acaeba7898081d82262862cff", "url": + "component.yaml"}'} + - name: test-kubeflow-pipeline + dag: + tasks: + - {name: kfp-example, template: kfp-example} + arguments: + parameters: [] + serviceAccountName: pipeline-runner diff --git a/tests/resources/kfp/requirements.txt b/tests/resources/kfp/requirements.txt new file mode 100644 index 0000000..a36e12c --- /dev/null +++ b/tests/resources/kfp/requirements.txt @@ -0,0 +1,2 @@ +kfp~=1.8.12 +mlflow diff --git a/tests/resources/kfp/train.py b/tests/resources/kfp/train.py new file mode 100644 index 0000000..0d79c26 --- /dev/null +++ b/tests/resources/kfp/train.py @@ -0,0 +1,22 @@ +import mlflow + +MLFLOW_TRACKING_URI = "http://mlflow.mlflow.svc.cluster.local:5000" +MLFLOW_EXPERIMENT_NAME = "Kubeflow Pipeline test run" + + +def main(): + mlflow.set_tracking_uri(MLFLOW_TRACKING_URI) + experiment = mlflow.get_experiment_by_name(MLFLOW_EXPERIMENT_NAME) + + if experiment is None: + experiment_id = mlflow.create_experiment(MLFLOW_EXPERIMENT_NAME) + else: + experiment_id = experiment.experiment_id + + with mlflow.start_run(experiment_id=experiment_id) as run: + mlflow.log_param("my", "param") + mlflow.log_metric("score", 100) + + +if __name__ == '__main__': + main() diff --git a/tests/resources/registry/Dockerfile b/tests/resources/registry/Dockerfile new file mode 100644 index 0000000..fa12fec --- /dev/null +++ b/tests/resources/registry/Dockerfile @@ -0,0 +1,10 @@ +FROM python:3.8.1 + +WORKDIR /app + +COPY requirements.txt . +RUN pip install -r requirements.txt --quiet --no-cache-dir + +COPY train.py . + +CMD ["python", "train.py"] diff --git a/tests/resources/registry/build_push_image.sh b/tests/resources/registry/build_push_image.sh new file mode 100755 index 0000000..513de3c --- /dev/null +++ b/tests/resources/registry/build_push_image.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +set -eux + +HOST_IP=$1 + +REGISTRY="$HOST_IP:5001" +IMAGE_NAME="kfp-registry-test-image" +IMAGE_TAG="reg-test-kfp" + +FULL_IMAGE_NAME=${REGISTRY}/${IMAGE_NAME}:${IMAGE_TAG} + +cd "$(dirname "$0")" + +docker build -t "$FULL_IMAGE_NAME" . + +# load the image into the local "kind" cluster with name "kind-ep" +#kind load docker-image "$FULL_IMAGE_NAME" --name kind-ep + +# to push the image to a remote repository instead +docker push "$FULL_IMAGE_NAME" \ No newline at end of file diff --git a/tests/resources/registry/pipeline.yaml.template b/tests/resources/registry/pipeline.yaml.template new file mode 100644 index 0000000..ff61560 --- /dev/null +++ b/tests/resources/registry/pipeline.yaml.template @@ -0,0 +1,34 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: test-kubeflow-pipeline-registry + annotations: {pipelines.kubeflow.org/kfp_sdk_version: 1.8.10, pipelines.kubeflow.org/pipeline_compilation_time: '2022-09-24T16:47:23.603487', + pipelines.kubeflow.org/pipeline_spec: '{"description": "Pipeline to test an example + component (Registry)", "name": "Test Kubeflow pipeline (Registry)"}'} + labels: {pipelines.kubeflow.org/kfp_sdk_version: 1.8.10} +spec: + entrypoint: test-kubeflow-pipeline-registry + templates: + - name: kfp-example + container: + args: [] + command: [python, train.py] + image: ${HOST_IP}:5001/kfp-registry-test-image:reg-test-kfp + metadata: + labels: + pipelines.kubeflow.org/kfp_sdk_version: 1.8.10 + pipelines.kubeflow.org/pipeline-sdk-type: kfp + pipelines.kubeflow.org/enable_caching: "true" + annotations: {pipelines.kubeflow.org/component_spec: '{"description": "Example + component using MLflow", "implementation": {"container": {"args": [], "command": + ["python", "train.py"], "image": "${HOST_IP}:5001/kfp-registry-test-image:reg-test-kfp"}}, "inputs": [], + "name": "KFP example", "outputs": []}', pipelines.kubeflow.org/component_ref: '{"digest": + "8c96d2df65ae29c70b3c82f839474c9399c5c18acaeba7898081d82262862cff", "url": + "component.yaml"}'} + - name: test-kubeflow-pipeline-registry + dag: + tasks: + - {name: kfp-example, template: kfp-example} + arguments: + parameters: [] + serviceAccountName: pipeline-runner diff --git a/tests/resources/registry/requirements.txt b/tests/resources/registry/requirements.txt new file mode 100644 index 0000000..793f682 --- /dev/null +++ b/tests/resources/registry/requirements.txt @@ -0,0 +1 @@ +kfp~=1.8.12 diff --git a/tests/resources/registry/train.py b/tests/resources/registry/train.py new file mode 100644 index 0000000..51d6325 --- /dev/null +++ b/tests/resources/registry/train.py @@ -0,0 +1,8 @@ + + +def main(): + print('Done') + + +if __name__ == '__main__': + main() diff --git a/tests/test_cluster_ready.py b/tests/test_cluster_ready.py new file mode 100644 index 0000000..9fd574e --- /dev/null +++ b/tests/test_cluster_ready.py @@ -0,0 +1,53 @@ +import subprocess +import logging +import pytest + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +CHECK_NAMESPACES = [ + # 'ingress-nginx', + 'mlflow', + 'kube-node-lease', + 'kube-public', + 'kube-system', + 'kubeflow', + 'local-path-storage' +] + + +def all_pods_ready(namespace: str): + output = subprocess.check_output(["kubectl", "get", "pods", "-n", namespace]) + + logger.info("\n" + output.decode()) + + for line in output.decode().strip().split('\n')[1:]: + + name, ready, status, restarts = line.split()[:4] + + if status != 'Completed' and (ready[0] == '0' or status != 'Running'): + logger.error(f"ERROR: Resources not ready (namespace={namespace}).") + return False + + logger.info(f"All resources are ready (namespace={namespace}).") + return True + + +def get_all_namespaces(): + out = subprocess.check_output(["kubectl", "get", "namespaces"]).decode() + all_namespaces = [n.split()[0] for n in out.strip().split('\n')[1:]] + return all_namespaces + + +@pytest.mark.order(2) +@pytest.mark.parametrize(argnames="namespace", argvalues=CHECK_NAMESPACES) +def test_namespaces_exists(namespace): + all_namespaces = get_all_namespaces() + assert namespace in all_namespaces, f"Namespace {namespace} doesn't exists." + + +@pytest.mark.order(1) +@pytest.mark.parametrize(argnames="namespace", argvalues=get_all_namespaces()) +def test_resources_ready(namespace: str): + assert all_pods_ready(namespace=namespace), "Some resources are not ready yet." diff --git a/tests/test_kfp.py b/tests/test_kfp.py new file mode 100644 index 0000000..066c02b --- /dev/null +++ b/tests/test_kfp.py @@ -0,0 +1,211 @@ +import subprocess +import logging +import pathlib +import time + +import kfp +import kfp_server_api +import pytest +import re +import requests +from urllib.parse import urlsplit + +from .conftest import CLUSTER_NAME + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +BUILD_FILE = pathlib.Path(__file__).parent / "resources" / "kfp" / "build_image.sh" +PIPELINE_FILE = pathlib.Path(__file__).parent / "resources" / "kfp" / "pipeline.yaml" + +IMAGE_NAME = "kfp-test-img" +EXPERIMENT_NAME = "Test Experiment" +KUBEFLOW_ENDPOINT = "http://localhost:8080" +KUBEFLOW_USERNAME = "user@example.com" +KUBEFLOW_PASSWORD = "12341234" +NAMESPACE = "kubeflow-user-example-com" + + +def get_istio_auth_session(url: str, username: str, password: str) -> dict: + """ + Determine if the specified URL is secured by Dex and try to obtain a session cookie. + WARNING: only Dex `staticPasswords` and `LDAP` authentication are currently supported + (we default default to using `staticPasswords` if both are enabled) + + :param url: Kubeflow server URL, including protocol + :param username: Dex `staticPasswords` or `LDAP` username + :param password: Dex `staticPasswords` or `LDAP` password + :return: auth session information + """ + # define the default return object + auth_session = { + "endpoint_url": url, # KF endpoint URL + "redirect_url": None, # KF redirect URL, if applicable + "dex_login_url": None, # Dex login URL (for POST of credentials) + "is_secured": None, # True if KF endpoint is secured + "session_cookie": None # Resulting session cookies in the form "key1=value1; key2=value2" + } + + # use a persistent session (for cookies) + with requests.Session() as s: + + ################ + # Determine if Endpoint is Secured + ################ + resp = s.get(url, allow_redirects=True) + if resp.status_code != 200: + raise RuntimeError( + f"HTTP status code '{resp.status_code}' for GET against: {url}" + ) + + auth_session["redirect_url"] = resp.url + + # if we were NOT redirected, then the endpoint is UNSECURED + if len(resp.history) == 0: + auth_session["is_secured"] = False + return auth_session + else: + auth_session["is_secured"] = True + + ################ + # Get Dex Login URL + ################ + redirect_url_obj = urlsplit(auth_session["redirect_url"]) + + # if we are at `/auth?=xxxx` path, we need to select an auth type + if re.search(r"/auth$", redirect_url_obj.path): + + ####### + # TIP: choose the default auth type by including ONE of the following + ####### + + # OPTION 1: set "staticPasswords" as default auth type + redirect_url_obj = redirect_url_obj._replace( + path=re.sub(r"/auth$", "/auth/local", redirect_url_obj.path) + ) + # OPTION 2: set "ldap" as default auth type + # redirect_url_obj = redirect_url_obj._replace( + # path=re.sub(r"/auth$", "/auth/ldap", redirect_url_obj.path) + # ) + + # if we are at `/auth/xxxx/login` path, then no further action is needed (we can use it for login POST) + if re.search(r"/auth/.*/login$", redirect_url_obj.path): + auth_session["dex_login_url"] = redirect_url_obj.geturl() + + # else, we need to be redirected to the actual login page + else: + # this GET should redirect us to the `/auth/xxxx/login` path + resp = s.get(redirect_url_obj.geturl(), allow_redirects=True) + if resp.status_code != 200: + raise RuntimeError( + f"HTTP status code '{resp.status_code}' for GET against: {redirect_url_obj.geturl()}" + ) + + # set the login url + auth_session["dex_login_url"] = resp.url + + ################ + # Attempt Dex Login + ################ + resp = s.post( + auth_session["dex_login_url"], + data={"login": username, "password": password}, + allow_redirects=True + ) + if len(resp.history) == 0: + raise RuntimeError( + f"Login credentials were probably invalid - " + f"No redirect after POST to: {auth_session['dex_login_url']}" + ) + + # store the session cookies in a "key1=value1; key2=value2" string + auth_session["session_cookie"] = "; ".join([f"{c.name}={c.value}" for c in s.cookies]) + + return auth_session + + +def run_pipeline(pipeline_file: str, experiment_name: str): + + with subprocess.Popen(["kubectl", "-n", "istio-system", "port-forward", "svc/istio-ingressgateway", "8080:80"], stdout=True) as proc: + try: + time.sleep(2) # give some time to the port-forward connection + + auth_session = get_istio_auth_session( + url=KUBEFLOW_ENDPOINT, + username=KUBEFLOW_USERNAME, + password=KUBEFLOW_PASSWORD + ) + + client = kfp.Client( + host=f"{KUBEFLOW_ENDPOINT}/pipeline", + cookies=auth_session["session_cookie"] + ) + + created_run = client.create_run_from_pipeline_package( + pipeline_file=pipeline_file, + enable_caching=False, + arguments={}, + run_name="kfp_test_run", + experiment_name=experiment_name, + namespace=NAMESPACE + ) + + run_id = created_run.run_id + + logger.info(f"Submitted run with ID: {run_id}") + + logger.info(f"Waiting for run {run_id} to complete....") + run_detail = created_run.wait_for_run_completion() + _handle_job_end(run_detail) + + # clean up + experiment = client.get_experiment( + experiment_name=experiment_name, namespace=NAMESPACE + ) + client.delete_experiment(experiment.id) + logger.info("Done") + + except Exception as e: + logger.error(f"ERROR: {e}") + raise e + finally: + proc.terminate() + + +def _handle_job_end(run_detail: kfp_server_api.ApiRunDetail): + finished_run = run_detail.to_dict()["run"] + + created_at = finished_run["created_at"] + finished_at = finished_run["finished_at"] + + duration_secs = (finished_at - created_at).total_seconds() + + status = finished_run["status"] + + logger.info(f"Run finished in {round(duration_secs)} seconds with status: {status}") + + if status != "Succeeded": + raise Exception(f"Run failed: {run_detail.run.id}") + + +def build_load_image(): + output = subprocess.check_output( + ["docker", "exec", f"{CLUSTER_NAME}-control-plane", "crictl", "images"] + ) + + if IMAGE_NAME in output.decode(): + logging.info(f"Image already in cluster.") + else: + logging.info(f"Image not found in cluster. Building and loading image...") + subprocess.run([str(BUILD_FILE)], stdout=True) + + +@pytest.mark.order(6) +@pytest.mark.timeout(240) +def test_run_pipeline(): + + # build the base docker image and load it into the cluster + build_load_image() + + # submit and run pipeline + run_pipeline(pipeline_file=str(PIPELINE_FILE), experiment_name=EXPERIMENT_NAME) diff --git a/tests/test_mlflow.py b/tests/test_mlflow.py new file mode 100644 index 0000000..4a38322 --- /dev/null +++ b/tests/test_mlflow.py @@ -0,0 +1,185 @@ +import subprocess +import time +import mlflow +import mlflow.sklearn +import numpy as np +import pandas as pd +import pytest +from mlflow.tracking import MlflowClient +from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score +from sklearn.model_selection import train_test_split +from sklearn.linear_model import ElasticNet +import os +from minio import Minio +import uuid +import logging + +from .conftest import AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +MLFLOW_TRACKING_URI = "http://localhost:5000" + +MINIO_URI = "localhost:9000" +BUCKET_NAME = 'mlflow' + +MLFLOW_EXPERIMENT_NAME = f"mlflow-minio-test-{str(uuid.uuid4())[:5]}" +MODEL_NAME = "ElasticnetWineModel" + +os.environ['MLFLOW_S3_ENDPOINT_URL'] = 'http://localhost:9000/' +os.environ['AWS_ACCESS_KEY_ID'] = AWS_ACCESS_KEY_ID +os.environ['AWS_SECRET_ACCESS_KEY'] = AWS_SECRET_ACCESS_KEY + + +@pytest.mark.order(3) +def test_create_experiment(): + with subprocess.Popen(["kubectl", "-n", "mlflow", "port-forward", "svc/mlflow", "5000:5000"], stdout=True) as proc: + try: + time.sleep(2) # give some time to the port-forward connection + mlflow.set_tracking_uri(MLFLOW_TRACKING_URI) + client = MlflowClient() + rand_id = str(uuid.uuid4())[:5] + experiment_id = client.create_experiment(f"Test-{rand_id}") + logger.info(f"Experiment id: {experiment_id}") + client.delete_experiment(experiment_id) + logger.info("Done") + except Exception as e: + logger.error(f"ERROR: {e}") + raise e + finally: + proc.terminate() + + +@pytest.mark.order(4) +def test_minio_create_bucket(): + + bucket_name = f"test-{str(uuid.uuid4())[:5]}" + + client = Minio( + MINIO_URI, + access_key=AWS_ACCESS_KEY_ID, + secret_key=AWS_SECRET_ACCESS_KEY, + secure=False, + ) + + with subprocess.Popen(["kubectl", "-n", "mlflow", "port-forward", "svc/mlflow-minio-service", "9000:9000"], stdout=True) as proc: + try: + time.sleep(2) # give some time to the port-forward connection + client.make_bucket(bucket_name) + client.remove_bucket(bucket_name) + logger.info("Done") + except Exception as e: + logger.error(f"ERROR: {e}") + raise e + finally: + proc.terminate() + + +def eval_metrics(actual, pred): + rmse = np.sqrt(mean_squared_error(actual, pred)) + mae = mean_absolute_error(actual, pred) + r2 = r2_score(actual, pred) + return rmse, mae, r2 + + +def clean_up(): + logger.info(f"Cleaning experiment and model") + client = MlflowClient() + experiment = client.get_experiment_by_name(MLFLOW_EXPERIMENT_NAME) + experiment_id = experiment.experiment_id + + # client.delete_experiment(experiment_id) + + client.delete_registered_model(MODEL_NAME) + + logger.info(f"Cleaning artifacts") + minioClient = Minio( + MINIO_URI, + access_key=AWS_ACCESS_KEY_ID, + secret_key=AWS_SECRET_ACCESS_KEY, + secure=False, + ) + objects_to_delete = minioClient.list_objects( + bucket_name=BUCKET_NAME, prefix=experiment_id, recursive=True + ) + + for obj in objects_to_delete: + logger.info(f"Deleting artifact: {obj.object_name}") + minioClient.remove_object(bucket_name=BUCKET_NAME, object_name=obj.object_name) + + +def run_experiment(): + + np.random.seed(40) + + # Read the wine-quality csv file from the URL + csv_url = "http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv" + + data = pd.read_csv(csv_url, sep=";") + + # Split the data into training and test sets. (0.75, 0.25) split. + train, test = train_test_split(data) + + # The predicted column is "quality" which is a scalar from [3, 9] + train_x = train.drop(["quality"], axis=1) + test_x = test.drop(["quality"], axis=1) + train_y = train[["quality"]] + test_y = test[["quality"]] + + alpha = 0.5 + l1_ratio = 0.5 + + logger.info(f"Using MLflow tracking URI: {MLFLOW_TRACKING_URI}") + mlflow.set_tracking_uri(MLFLOW_TRACKING_URI) + + logger.info(f"Using MLflow experiment: {MLFLOW_EXPERIMENT_NAME}") + mlflow.set_experiment(MLFLOW_EXPERIMENT_NAME) + + with mlflow.start_run(): + lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42) + + logger.info("Fitting model...") + + lr.fit(train_x, train_y) + + logger.info("Finished fitting") + + predicted_qualities = lr.predict(test_x) + + (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities) + + print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio)) + print(" RMSE: %s" % rmse) + print(" MAE: %s" % mae) + print(" R2: %s" % r2) + + logger.info("Logging parameters to MLflow") + mlflow.log_param("alpha", alpha) + mlflow.log_param("l1_ratio", l1_ratio) + mlflow.log_metric("rmse", rmse) + mlflow.log_metric("r2", r2) + mlflow.log_metric("mae", mae) + + logger.info("Logging trained model") + mlflow.sklearn.log_model(lr, "model", registered_model_name=MODEL_NAME) + + # clean up experiment and artifacts + clean_up() + + +@pytest.mark.order(5) +def test_mlflow_end_to_end(): + + with subprocess.Popen(["kubectl", "-n", "mlflow", "port-forward", "svc/mlflow", "5000:5000"], stdout=False) as proc1: + with subprocess.Popen(["kubectl", "-n", "mlflow", "port-forward", "svc/mlflow-minio-service", "9000:9000"], stdout=False) as proc2: + try: + time.sleep(2) # give some time to the port-forward connection + run_experiment() + logger.info("Done") + except Exception as e: + logger.error(f"ERROR: {e}") + raise e + finally: + proc1.terminate() + proc2.terminate() diff --git a/tests/test_registry.py b/tests/test_registry.py new file mode 100644 index 0000000..e13cf83 --- /dev/null +++ b/tests/test_registry.py @@ -0,0 +1,60 @@ +import subprocess +import logging +import pathlib +import pytest +import os +from envsubst import envsubst + +from .conftest import HOST_IP +from .test_kfp import run_pipeline + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +BUILD_FILE = pathlib.Path(__file__).parent / "resources" / "registry" / "build_push_image.sh" +PIPELINE_TEMPLATE = pathlib.Path(__file__).parent / "resources" / "registry" / "pipeline.yaml.template" + +IMAGE_NAME = "kfp-registry-test-image" +EXPERIMENT_NAME = "Test Experiment (Registry)" + + +def build_push_image(): + logging.info(f"Building and pushing image to local registry...") + subprocess.run([str(BUILD_FILE), HOST_IP], stdout=True) + + +def render_pipeline_yaml(output: str): + """Use the pipeline.yaml.template to create the final pipeline.yaml with the + correct registry IP by replacing the "${HOST_IP}" placeholder.""" + with open(PIPELINE_TEMPLATE, "r") as f_tpl: + with open(output, "w") as f_out: + f_out.write(envsubst(f_tpl.read())) + + +@pytest.mark.order(7) +@pytest.mark.skipif( + os.environ.get('INSTALL_LOCAL_REGISTRY') == 'false', + reason="No local image registry was installed." +) +def test_push_image(): + # build the base docker image and load it into the cluster + build_push_image() + + +@pytest.mark.order(8) +@pytest.mark.timeout(120) +@pytest.mark.skipif( + os.environ.get('INSTALL_LOCAL_REGISTRY') == 'false', + reason="No local image registry was installed." +) +def test_run_pipeline_using_registry(tmp_path): + + # build the base docker image and load it into the cluster + build_push_image() + + # create pipeline.yaml with the right registry IP address + pipeline_file = tmp_path / "pipeline.yaml" + render_pipeline_yaml(output=str(pipeline_file)) + + # submit and run pipeline + run_pipeline(pipeline_file=str(pipeline_file), experiment_name=EXPERIMENT_NAME) diff --git a/tests/wait_deployment_ready.py b/tests/wait_deployment_ready.py new file mode 100644 index 0000000..51d2d6e --- /dev/null +++ b/tests/wait_deployment_ready.py @@ -0,0 +1,87 @@ +import subprocess +import logging +import time +import pathlib +from dotenv import load_dotenv +import os + +import argparse + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +parser = argparse.ArgumentParser() +parser.add_argument( + "-t", "--timeout", help="Maximum to wait for.", required=False, type=float +) + +ENV_FILE = pathlib.Path(__file__).parent.parent / "config.env" +load_dotenv(dotenv_path=ENV_FILE) + +CLUSTER_NAME = os.getenv("CLUSTER_NAME") +assert CLUSTER_NAME is not None +CONTEXT_NAME = f"kind-{CLUSTER_NAME}" + +subprocess.run(["kubectl", "config", "use-context", CONTEXT_NAME], stdout=True) + + +def all_pods_ready(namespace: str): + output = subprocess.check_output(["kubectl", "get", "pods", "-n", namespace]) + + logger.info("\n" + output.decode()) + + for line in output.decode().strip().split("\n")[1:]: + + name, ready, status, restarts = line.split()[:4] + + # skip this pod which is always down + if name.startswith("proxy-agent") and namespace == "kubeflow": + continue + + if status != "Completed" and (ready[0] == "0" or status != "Running"): + logger.info(f"Resources not ready (namespace={namespace}).") + return False + + logger.info(f"All resources are ready (namespace={namespace}).") + return True + + +def get_all_namespaces(): + out = subprocess.check_output(["kubectl", "get", "namespaces"]).decode() + all_namespaces = [n.split()[0] for n in out.strip().split("\n")[1:]] + return all_namespaces + + +def wait_deployment_ready(timeout: float = None): + + start_time = time.time() + + namespaces = get_all_namespaces() + namespaces = [{"name": name, "ready": False} for name in namespaces] + + all_ready = False + + while not all_ready: + + for namespace in namespaces: + if not namespace["ready"]: + namespace["ready"] = all_pods_ready(namespace=namespace["name"]) + + all_ready = all([namespace["ready"] for namespace in namespaces]) + + if all_ready: + logger.info(f"Cluster ready!") + break + else: + if timeout and time.time() - start_time > timeout * 60: + raise TimeoutError + else: + logger.info(f"Waiting for resources...") + time.sleep(10) + + +if __name__ == "__main__": + args = parser.parse_args() + logger.info(vars(args)) + + wait_deployment_ready(args.timeout)