From 5e8c5f87b844bae3a26d920f43a504de37b051b1 Mon Sep 17 00:00:00 2001 From: jaycee-li Date: Thu, 7 Jul 2022 17:22:43 -0700 Subject: [PATCH 1/8] feat: update from_local_script() --- google/cloud/aiplatform/jobs.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/google/cloud/aiplatform/jobs.py b/google/cloud/aiplatform/jobs.py index c3e0459aac..fecb9baa7f 100644 --- a/google/cloud/aiplatform/jobs.py +++ b/google/cloud/aiplatform/jobs.py @@ -1389,6 +1389,11 @@ def from_local_script( "image_uri": reduction_server_container_uri, } else: + ## check if the container is pre-built + elif ( + ("docker.pkg.dev/vertex-ai/" in container_uri) + or ("gcr.io/cloud-aiplatform/" in container_uri) + ): spec["python_package_spec"] = { "executor_image_uri": container_uri, "python_module": python_packager.module_name, @@ -1403,6 +1408,29 @@ def from_local_script( {"name": key, "value": value} for key, value in environment_variables.items() ] + else: + command = [ + "sh", + "-c", + "\npip3 install -q --user --upgrade --no-warn-script-location gsutil" + + f"\ngsutil -q cp {package_gcs_uri} ." + + f"\npip3 install -q --user {package_gcs_uri[len(staging_bucket)+1:]}" + + f"\npython3 -m {python_packager.module_name}" + ] + + spec["container_spec"] = { + "image_uri": container_uri, + "command": command, + } + + if args: + spec["container_spec"]["args"] = args + + if environment_variables: + spec["container_spec"]["env"] = [ + {"name": key, "value": value} + for key, value in environment_variables.items() + ] return cls( display_name=display_name, From 59c57d829f6dcde1d6dc6805c88ba5a1066237ae Mon Sep 17 00:00:00 2001 From: jaycee-li Date: Thu, 7 Jul 2022 17:29:45 -0700 Subject: [PATCH 2/8] fix from_local_script() --- google/cloud/aiplatform/jobs.py | 1 - 1 file changed, 1 deletion(-) diff --git a/google/cloud/aiplatform/jobs.py b/google/cloud/aiplatform/jobs.py index fecb9baa7f..0fbb0e55a5 100644 --- a/google/cloud/aiplatform/jobs.py +++ b/google/cloud/aiplatform/jobs.py @@ -1388,7 +1388,6 @@ def from_local_script( spec["container_spec"] = { "image_uri": reduction_server_container_uri, } - else: ## check if the container is pre-built elif ( ("docker.pkg.dev/vertex-ai/" in container_uri) From 4c590c61245cbea53d10a53d86db7371b00390a6 Mon Sep 17 00:00:00 2001 From: jaycee-li Date: Fri, 8 Jul 2022 12:47:21 -0700 Subject: [PATCH 3/8] fix: update unit tests --- google/cloud/aiplatform/jobs.py | 13 +- tests/unit/aiplatform/test_custom_job.py | 201 ++++++++++++++++---- tests/unit/aiplatform/test_training_jobs.py | 2 +- 3 files changed, 166 insertions(+), 50 deletions(-) diff --git a/google/cloud/aiplatform/jobs.py b/google/cloud/aiplatform/jobs.py index 0fbb0e55a5..48e4d8546c 100644 --- a/google/cloud/aiplatform/jobs.py +++ b/google/cloud/aiplatform/jobs.py @@ -1389,9 +1389,8 @@ def from_local_script( "image_uri": reduction_server_container_uri, } ## check if the container is pre-built - elif ( - ("docker.pkg.dev/vertex-ai/" in container_uri) - or ("gcr.io/cloud-aiplatform/" in container_uri) + elif ("docker.pkg.dev/vertex-ai/" in container_uri) or ( + "gcr.io/cloud-aiplatform/" in container_uri ): spec["python_package_spec"] = { "executor_image_uri": container_uri, @@ -1411,10 +1410,10 @@ def from_local_script( command = [ "sh", "-c", - "\npip3 install -q --user --upgrade --no-warn-script-location gsutil" + - f"\ngsutil -q cp {package_gcs_uri} ." + - f"\npip3 install -q --user {package_gcs_uri[len(staging_bucket)+1:]}" + - f"\npython3 -m {python_packager.module_name}" + "\npip3 install -q --user --upgrade --no-warn-script-location gsutil" + + f"\ngsutil -q cp {package_gcs_uri} ." + + f"\npip3 install -q --user {package_gcs_uri[len(staging_bucket)+1:]}" + + f"\npython3 -m {python_packager.module_name}", ] spec["container_spec"] = { diff --git a/tests/unit/aiplatform/test_custom_job.py b/tests/unit/aiplatform/test_custom_job.py index c09df26ad2..10289b7b84 100644 --- a/tests/unit/aiplatform/test_custom_job.py +++ b/tests/unit/aiplatform/test_custom_job.py @@ -50,6 +50,7 @@ _TEST_ENABLE_WEB_ACCESS = True _TEST_WEB_ACCESS_URIS = {"workerpool0-0": "uri"} _TEST_TRAINING_CONTAINER_IMAGE = "gcr.io/test-training/container:image" +_TEST_PREBUILT_CONTAINER_IMAGE = "gcr.io/cloud-aiplatform/container:image" _TEST_RUN_ARGS = ["-v", "0.1", "--test=arg"] @@ -70,6 +71,24 @@ } ] +_TEST_PYTHON_PACKAGE_SPEC = gca_custom_job_compat.PythonPackageSpec( + executor_image_uri=_TEST_PREBUILT_CONTAINER_IMAGE, + package_uris=[test_training_jobs._TEST_OUTPUT_PYTHON_PACKAGE_PATH], + python_module=test_training_jobs._TEST_MODULE_NAME, +) + +_TEST_CONTAINER_SPEC = gca_custom_job_compat.ContainerSpec( + image_uri=_TEST_TRAINING_CONTAINER_IMAGE, + command=[ + "sh", + "-c", + "\npip3 install -q --user --upgrade --no-warn-script-location gsutil" + + f"\ngsutil -q cp {test_training_jobs._TEST_OUTPUT_PYTHON_PACKAGE_PATH} ." + + f"\npip3 install -q --user trainer.tar.gz" + + f"\npython3 -m {test_training_jobs._TEST_MODULE_NAME}", + ], +) + _TEST_STAGING_BUCKET = "gs://test-staging-bucket" _TEST_BASE_OUTPUT_DIR = f"{_TEST_STAGING_BUCKET}/{_TEST_DISPLAY_NAME}" @@ -532,7 +551,41 @@ def test_get_custom_job(self, get_custom_job_mock): @pytest.mark.usefixtures("mock_python_package_to_gcs") @pytest.mark.parametrize("sync", [True, False]) - def test_create_from_local_script( + def test_create_from_local_script_prebuilt_container( + self, get_custom_job_mock, create_custom_job_mock, sync + ): + aiplatform.init( + project=_TEST_PROJECT, + location=_TEST_LOCATION, + staging_bucket=_TEST_STAGING_BUCKET, + encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, + ) + + # configuration on this is tested in test_training_jobs.py + job = aiplatform.CustomJob.from_local_script( + display_name=_TEST_DISPLAY_NAME, + script_path=test_training_jobs._TEST_LOCAL_SCRIPT_FILE_NAME, + container_uri=_TEST_PREBUILT_CONTAINER_IMAGE, + base_output_dir=_TEST_BASE_OUTPUT_DIR, + labels=_TEST_LABELS, + ) + + assert ( + job.job_spec.worker_pool_specs[0].python_package_spec + == _TEST_PYTHON_PACKAGE_SPEC + ) + + job.run(sync=sync) + + job.wait() + + assert ( + job._gca_resource.state == gca_job_state_compat.JobState.JOB_STATE_SUCCEEDED + ) + + @pytest.mark.usefixtures("mock_python_package_to_gcs") + @pytest.mark.parametrize("sync", [True, False]) + def test_create_from_local_script_custom_container( self, get_custom_job_mock, create_custom_job_mock, sync ): aiplatform.init( @@ -551,6 +604,8 @@ def test_create_from_local_script( labels=_TEST_LABELS, ) + assert job.job_spec.worker_pool_specs[0].container_spec == _TEST_CONTAINER_SPEC + job.run(sync=sync) job.wait() @@ -579,6 +634,109 @@ def test_create_from_local_script_raises_with_no_staging_bucket( container_uri=_TEST_TRAINING_CONTAINER_IMAGE, ) + @pytest.mark.usefixtures("mock_python_package_to_gcs") + @pytest.mark.parametrize("sync", [True, False]) + def test_create_from_local_script_prebuilt_container_with_all_args( + self, get_custom_job_mock, create_custom_job_mock, sync + ): + aiplatform.init( + project=_TEST_PROJECT, + location=_TEST_LOCATION, + staging_bucket=_TEST_STAGING_BUCKET, + encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, + ) + + # configuration on this is tested in test_training_jobs.py + job = aiplatform.CustomJob.from_local_script( + display_name=_TEST_DISPLAY_NAME, + script_path=test_training_jobs._TEST_LOCAL_SCRIPT_FILE_NAME, + container_uri=_TEST_PREBUILT_CONTAINER_IMAGE, + args=_TEST_RUN_ARGS, + requirements=test_training_jobs._TEST_REQUIREMENTS, + environment_variables=test_training_jobs._TEST_ENVIRONMENT_VARIABLES, + replica_count=test_training_jobs._TEST_REPLICA_COUNT, + machine_type=test_training_jobs._TEST_MACHINE_TYPE, + accelerator_type=test_training_jobs._TEST_ACCELERATOR_TYPE, + accelerator_count=test_training_jobs._TEST_ACCELERATOR_COUNT, + boot_disk_type=test_training_jobs._TEST_BOOT_DISK_TYPE, + boot_disk_size_gb=test_training_jobs._TEST_BOOT_DISK_SIZE_GB, + reduction_server_replica_count=test_training_jobs._TEST_REDUCTION_SERVER_REPLICA_COUNT, + reduction_server_machine_type=test_training_jobs._TEST_REDUCTION_SERVER_MACHINE_TYPE, + reduction_server_container_uri=test_training_jobs._TEST_REDUCTION_SERVER_CONTAINER_URI, + base_output_dir=_TEST_BASE_OUTPUT_DIR, + labels=_TEST_LABELS, + ) + + expected_python_package_spec = _TEST_PYTHON_PACKAGE_SPEC + expected_python_package_spec.args = _TEST_RUN_ARGS + expected_python_package_spec.env = [ + {"name": key, "value": value} + for key, value in test_training_jobs._TEST_ENVIRONMENT_VARIABLES.items() + ] + + assert ( + job.job_spec.worker_pool_specs[0].python_package_spec + == expected_python_package_spec + ) + job.run(sync=sync) + + job.wait() + + assert ( + job._gca_resource.state == gca_job_state_compat.JobState.JOB_STATE_SUCCEEDED + ) + + @pytest.mark.usefixtures("mock_python_package_to_gcs") + @pytest.mark.parametrize("sync", [True, False]) + def test_create_from_local_script_custom_container_with_all_args( + self, get_custom_job_mock, create_custom_job_mock, sync + ): + aiplatform.init( + project=_TEST_PROJECT, + location=_TEST_LOCATION, + staging_bucket=_TEST_STAGING_BUCKET, + encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, + ) + + # configuration on this is tested in test_training_jobs.py + job = aiplatform.CustomJob.from_local_script( + display_name=_TEST_DISPLAY_NAME, + script_path=test_training_jobs._TEST_LOCAL_SCRIPT_FILE_NAME, + container_uri=_TEST_TRAINING_CONTAINER_IMAGE, + args=_TEST_RUN_ARGS, + requirements=test_training_jobs._TEST_REQUIREMENTS, + environment_variables=test_training_jobs._TEST_ENVIRONMENT_VARIABLES, + replica_count=test_training_jobs._TEST_REPLICA_COUNT, + machine_type=test_training_jobs._TEST_MACHINE_TYPE, + accelerator_type=test_training_jobs._TEST_ACCELERATOR_TYPE, + accelerator_count=test_training_jobs._TEST_ACCELERATOR_COUNT, + boot_disk_type=test_training_jobs._TEST_BOOT_DISK_TYPE, + boot_disk_size_gb=test_training_jobs._TEST_BOOT_DISK_SIZE_GB, + reduction_server_replica_count=test_training_jobs._TEST_REDUCTION_SERVER_REPLICA_COUNT, + reduction_server_machine_type=test_training_jobs._TEST_REDUCTION_SERVER_MACHINE_TYPE, + reduction_server_container_uri=test_training_jobs._TEST_REDUCTION_SERVER_CONTAINER_URI, + base_output_dir=_TEST_BASE_OUTPUT_DIR, + labels=_TEST_LABELS, + ) + + expected_container_spec = _TEST_CONTAINER_SPEC + expected_container_spec.args = _TEST_RUN_ARGS + expected_container_spec.env = [ + {"name": key, "value": value} + for key, value in test_training_jobs._TEST_ENVIRONMENT_VARIABLES.items() + ] + + assert ( + job.job_spec.worker_pool_specs[0].container_spec == expected_container_spec + ) + job.run(sync=sync) + + job.wait() + + assert ( + job._gca_resource.state == gca_job_state_compat.JobState.JOB_STATE_SUCCEEDED + ) + @pytest.mark.parametrize("sync", [True, False]) def test_create_custom_job_with_enable_web_access( self, @@ -722,47 +880,6 @@ def test_create_custom_job_without_base_output_dir( f"{_TEST_STAGING_BUCKET}/aiplatform-custom-job" ) - @pytest.mark.usefixtures("mock_python_package_to_gcs") - @pytest.mark.parametrize("sync", [True, False]) - def test_create_from_local_script_with_all_args( - self, get_custom_job_mock, create_custom_job_mock, sync - ): - aiplatform.init( - project=_TEST_PROJECT, - location=_TEST_LOCATION, - staging_bucket=_TEST_STAGING_BUCKET, - encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, - ) - - # configuration on this is tested in test_training_jobs.py - job = aiplatform.CustomJob.from_local_script( - display_name=_TEST_DISPLAY_NAME, - script_path=test_training_jobs._TEST_LOCAL_SCRIPT_FILE_NAME, - container_uri=_TEST_TRAINING_CONTAINER_IMAGE, - args=_TEST_RUN_ARGS, - requirements=test_training_jobs._TEST_REQUIREMENTS, - environment_variables=test_training_jobs._TEST_ENVIRONMENT_VARIABLES, - replica_count=test_training_jobs._TEST_REPLICA_COUNT, - machine_type=test_training_jobs._TEST_MACHINE_TYPE, - accelerator_type=test_training_jobs._TEST_ACCELERATOR_TYPE, - accelerator_count=test_training_jobs._TEST_ACCELERATOR_COUNT, - boot_disk_type=test_training_jobs._TEST_BOOT_DISK_TYPE, - boot_disk_size_gb=test_training_jobs._TEST_BOOT_DISK_SIZE_GB, - reduction_server_replica_count=test_training_jobs._TEST_REDUCTION_SERVER_REPLICA_COUNT, - reduction_server_machine_type=test_training_jobs._TEST_REDUCTION_SERVER_MACHINE_TYPE, - reduction_server_container_uri=test_training_jobs._TEST_REDUCTION_SERVER_CONTAINER_URI, - base_output_dir=_TEST_BASE_OUTPUT_DIR, - labels=_TEST_LABELS, - ) - - job.run(sync=sync) - - job.wait() - - assert ( - job._gca_resource.state == gca_job_state_compat.JobState.JOB_STATE_SUCCEEDED - ) - @pytest.mark.usefixtures("get_custom_job_mock", "create_custom_job_mock") def test_check_custom_job_availability(self): aiplatform.init( diff --git a/tests/unit/aiplatform/test_training_jobs.py b/tests/unit/aiplatform/test_training_jobs.py index 8e474e1edb..fdadb2a1b1 100644 --- a/tests/unit/aiplatform/test_training_jobs.py +++ b/tests/unit/aiplatform/test_training_jobs.py @@ -159,7 +159,7 @@ _TEST_MODEL_SERVING_CONTAINER_PORTS = [8888, 10000] _TEST_MODEL_DESCRIPTION = "test description" -_TEST_OUTPUT_PYTHON_PACKAGE_PATH = "gs://test/ouput/python/trainer.tar.gz" +_TEST_OUTPUT_PYTHON_PACKAGE_PATH = "gs://test-staging-bucket/trainer.tar.gz" _TEST_PYTHON_MODULE_NAME = "aiplatform.task" _TEST_MODEL_NAME = f"projects/{_TEST_PROJECT}/locations/us-central1/models/{_TEST_ID}" From 5399cb1e1979105ce3e06751904315f2804cfad3 Mon Sep 17 00:00:00 2001 From: jaycee-li Date: Fri, 8 Jul 2022 13:21:29 -0700 Subject: [PATCH 4/8] fix lint failed --- tests/unit/aiplatform/test_custom_job.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/aiplatform/test_custom_job.py b/tests/unit/aiplatform/test_custom_job.py index 10289b7b84..8dc7572d64 100644 --- a/tests/unit/aiplatform/test_custom_job.py +++ b/tests/unit/aiplatform/test_custom_job.py @@ -84,7 +84,7 @@ "-c", "\npip3 install -q --user --upgrade --no-warn-script-location gsutil" + f"\ngsutil -q cp {test_training_jobs._TEST_OUTPUT_PYTHON_PACKAGE_PATH} ." - + f"\npip3 install -q --user trainer.tar.gz" + + "\npip3 install -q --user trainer.tar.gz" + f"\npython3 -m {test_training_jobs._TEST_MODULE_NAME}", ], ) From 0a043ef2144cbd045664ea61c5eafc665ae414af Mon Sep 17 00:00:00 2001 From: jaycee-li Date: Mon, 11 Jul 2022 15:19:03 -0700 Subject: [PATCH 5/8] fix: add system test --- google/cloud/aiplatform/jobs.py | 9 +- tests/system/aiplatform/test_custom_job.py | 84 +++++++++++++++++++ .../test_resources/custom_job_script.py | 18 ++++ 3 files changed, 109 insertions(+), 2 deletions(-) create mode 100644 tests/system/aiplatform/test_custom_job.py create mode 100644 tests/system/aiplatform/test_resources/custom_job_script.py diff --git a/google/cloud/aiplatform/jobs.py b/google/cloud/aiplatform/jobs.py index 48e4d8546c..4b4366f8de 100644 --- a/google/cloud/aiplatform/jobs.py +++ b/google/cloud/aiplatform/jobs.py @@ -1265,7 +1265,12 @@ def from_local_script( script_path (str): Required. Local path to training script. container_uri (str): - Required: Uri of the training container image to use for custom job. + Required. Uri of the training container image to use for custom job. + Support images in Artifact Registry, Container Registry, or Docker Hub. + Vertex AI provides a wide range of executor images with pre-installed + packages to meet users' various use cases. See the list of `pre-built containers + for training `. + If not using image from this list, please make sure python3 and pip3 are installed in your container. args (Optional[Sequence[str]]): Optional. Command line arguments to be passed to the Python task. requirements (Sequence[str]): @@ -1410,7 +1415,7 @@ def from_local_script( command = [ "sh", "-c", - "\npip3 install -q --user --upgrade --no-warn-script-location gsutil" + "\npip3 install -q --upgrade --no-warn-script-location gsutil" + f"\ngsutil -q cp {package_gcs_uri} ." + f"\npip3 install -q --user {package_gcs_uri[len(staging_bucket)+1:]}" + f"\npython3 -m {python_packager.module_name}", diff --git a/tests/system/aiplatform/test_custom_job.py b/tests/system/aiplatform/test_custom_job.py new file mode 100644 index 0000000000..34664902f7 --- /dev/null +++ b/tests/system/aiplatform/test_custom_job.py @@ -0,0 +1,84 @@ +# -*- coding: utf-8 -*- + +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +import pytest + +from google.cloud import aiplatform +from google.cloud.aiplatform.compat.types import job_state as gca_job_state +from tests.system.aiplatform import e2e_base + +_PREBUILT_CONTAINER_IMAGE = "gcr.io/cloud-aiplatform/training/tf-cpu.2-2:latest" +_CUSTOM_CONTAINER_IMAGE = "gcr.io/deeplearning-platform-release/base-cpu" + +_DIR_NAME = os.path.dirname(os.path.abspath(__file__)) +_LOCAL_TRAINING_SCRIPT_PATH = os.path.join( + _DIR_NAME, "test_resources/custom_job_script.py" +) + + +@pytest.mark.usefixtures( + "prepare_staging_bucket", "delete_staging_bucket", "tear_down_resources" +) +class TestCustomJob(e2e_base.TestEndToEnd): + + _temp_prefix = "temp-vertex-sdk-custom-job" + + def test_from_local_script_prebuilt_container(self, shared_state): + shared_state["resources"] = [] + + aiplatform.init( + project=e2e_base._PROJECT, + location=e2e_base._LOCATION, + staging_bucket=shared_state["staging_bucket_name"], + ) + + display_name = self._make_display_name("custom-job") + + custom_job = aiplatform.CustomJob.from_local_script( + display_name=display_name, + script_path=_LOCAL_TRAINING_SCRIPT_PATH, + container_uri=_PREBUILT_CONTAINER_IMAGE, + ) + custom_job.run() + + shared_state["resources"].append(custom_job) + + assert custom_job.state == gca_job_state.JobState.JOB_STATE_SUCCEEDED + + def test_from_local_script_custom_container(self, shared_state): + shared_state["resources"] = [] + + aiplatform.init( + project=e2e_base._PROJECT, + location=e2e_base._LOCATION, + staging_bucket=shared_state["staging_bucket_name"], + ) + + display_name = self._make_display_name("custom-job") + + custom_job = aiplatform.CustomJob.from_local_script( + display_name=display_name, + script_path=_LOCAL_TRAINING_SCRIPT_PATH, + container_uri=_CUSTOM_CONTAINER_IMAGE, + ) + custom_job.run() + + shared_state["resources"].append(custom_job) + + assert custom_job.state == gca_job_state.JobState.JOB_STATE_SUCCEEDED diff --git a/tests/system/aiplatform/test_resources/custom_job_script.py b/tests/system/aiplatform/test_resources/custom_job_script.py new file mode 100644 index 0000000000..744f7bb3c3 --- /dev/null +++ b/tests/system/aiplatform/test_resources/custom_job_script.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- + +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +print("Test CustomJob script.") From 0e75f2e87a48510c0c8ebcd031b67a3fc96d5f35 Mon Sep 17 00:00:00 2001 From: jaycee-li Date: Mon, 11 Jul 2022 21:39:49 -0700 Subject: [PATCH 6/8] fix: from_local_script --- google/cloud/aiplatform/jobs.py | 11 +++++++---- tests/system/aiplatform/test_custom_job.py | 2 +- tests/unit/aiplatform/test_custom_job.py | 11 +++++++---- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/google/cloud/aiplatform/jobs.py b/google/cloud/aiplatform/jobs.py index 4b4366f8de..bfd836492b 100644 --- a/google/cloud/aiplatform/jobs.py +++ b/google/cloud/aiplatform/jobs.py @@ -1415,10 +1415,13 @@ def from_local_script( command = [ "sh", "-c", - "\npip3 install -q --upgrade --no-warn-script-location gsutil" - + f"\ngsutil -q cp {package_gcs_uri} ." - + f"\npip3 install -q --user {package_gcs_uri[len(staging_bucket)+1:]}" - + f"\npython3 -m {python_packager.module_name}", + "DEBIAN_FRONTEND=noninteractive pip install --upgrade pip && " + + 'echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && ' + + "curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - && " + + "apt-get --allow-releaseinfo-change update -y && apt-get install -y google-cloud-sdk && " + + f"gsutil -q cp {package_gcs_uri} script.tar.gz && " + + "pip3 install -q --user script.tar.gz && " + + f"python3 -m {python_packager.module_name}", ] spec["container_spec"] = { diff --git a/tests/system/aiplatform/test_custom_job.py b/tests/system/aiplatform/test_custom_job.py index 34664902f7..69f6900287 100644 --- a/tests/system/aiplatform/test_custom_job.py +++ b/tests/system/aiplatform/test_custom_job.py @@ -24,7 +24,7 @@ from tests.system.aiplatform import e2e_base _PREBUILT_CONTAINER_IMAGE = "gcr.io/cloud-aiplatform/training/tf-cpu.2-2:latest" -_CUSTOM_CONTAINER_IMAGE = "gcr.io/deeplearning-platform-release/base-cpu" +_CUSTOM_CONTAINER_IMAGE = "python:3.8" _DIR_NAME = os.path.dirname(os.path.abspath(__file__)) _LOCAL_TRAINING_SCRIPT_PATH = os.path.join( diff --git a/tests/unit/aiplatform/test_custom_job.py b/tests/unit/aiplatform/test_custom_job.py index 8dc7572d64..751973269c 100644 --- a/tests/unit/aiplatform/test_custom_job.py +++ b/tests/unit/aiplatform/test_custom_job.py @@ -82,10 +82,13 @@ command=[ "sh", "-c", - "\npip3 install -q --user --upgrade --no-warn-script-location gsutil" - + f"\ngsutil -q cp {test_training_jobs._TEST_OUTPUT_PYTHON_PACKAGE_PATH} ." - + "\npip3 install -q --user trainer.tar.gz" - + f"\npython3 -m {test_training_jobs._TEST_MODULE_NAME}", + "DEBIAN_FRONTEND=noninteractive pip install --upgrade pip && " + + 'echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && ' + + "curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - && " + + "apt-get --allow-releaseinfo-change update -y && apt-get install -y google-cloud-sdk && " + + f"gsutil -q cp {test_training_jobs._TEST_OUTPUT_PYTHON_PACKAGE_PATH} script.tar.gz && " + + "pip3 install -q --user script.tar.gz && " + + f"python3 -m {test_training_jobs._TEST_MODULE_NAME}", ], ) From ffc7aa09901d587e568499a7efa73cdba38b59f9 Mon Sep 17 00:00:00 2001 From: jaycee-li Date: Tue, 19 Jul 2022 13:10:50 -0700 Subject: [PATCH 7/8] fix: from_local_script --- google/cloud/aiplatform/jobs.py | 10 ++++------ tests/unit/aiplatform/test_custom_job.py | 10 ++++------ 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/google/cloud/aiplatform/jobs.py b/google/cloud/aiplatform/jobs.py index bfd836492b..73929834ae 100644 --- a/google/cloud/aiplatform/jobs.py +++ b/google/cloud/aiplatform/jobs.py @@ -1415,12 +1415,10 @@ def from_local_script( command = [ "sh", "-c", - "DEBIAN_FRONTEND=noninteractive pip install --upgrade pip && " - + 'echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && ' - + "curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - && " - + "apt-get --allow-releaseinfo-change update -y && apt-get install -y google-cloud-sdk && " - + f"gsutil -q cp {package_gcs_uri} script.tar.gz && " - + "pip3 install -q --user script.tar.gz && " + "pip install --upgrade pip && " + + f"pip3 install -q --user {package_gcs_uri} && ".replace( + "gs://", "/gcs/" + ) + f"python3 -m {python_packager.module_name}", ] diff --git a/tests/unit/aiplatform/test_custom_job.py b/tests/unit/aiplatform/test_custom_job.py index 751973269c..bbb8f70a52 100644 --- a/tests/unit/aiplatform/test_custom_job.py +++ b/tests/unit/aiplatform/test_custom_job.py @@ -82,12 +82,10 @@ command=[ "sh", "-c", - "DEBIAN_FRONTEND=noninteractive pip install --upgrade pip && " - + 'echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && ' - + "curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - && " - + "apt-get --allow-releaseinfo-change update -y && apt-get install -y google-cloud-sdk && " - + f"gsutil -q cp {test_training_jobs._TEST_OUTPUT_PYTHON_PACKAGE_PATH} script.tar.gz && " - + "pip3 install -q --user script.tar.gz && " + "pip install --upgrade pip && " + + f"pip3 install -q --user {test_training_jobs._TEST_OUTPUT_PYTHON_PACKAGE_PATH} && ".replace( + "gs://", "/gcs/" + ) + f"python3 -m {test_training_jobs._TEST_MODULE_NAME}", ], ) From a42fc76615b705220c7148c20e9a9d8cfcbfe673 Mon Sep 17 00:00:00 2001 From: jaycee-li Date: Wed, 20 Jul 2022 09:09:17 -0700 Subject: [PATCH 8/8] fix: system test --- tests/system/aiplatform/test_custom_job.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/system/aiplatform/test_custom_job.py b/tests/system/aiplatform/test_custom_job.py index 69f6900287..4714773901 100644 --- a/tests/system/aiplatform/test_custom_job.py +++ b/tests/system/aiplatform/test_custom_job.py @@ -62,7 +62,6 @@ def test_from_local_script_prebuilt_container(self, shared_state): assert custom_job.state == gca_job_state.JobState.JOB_STATE_SUCCEEDED def test_from_local_script_custom_container(self, shared_state): - shared_state["resources"] = [] aiplatform.init( project=e2e_base._PROJECT,