diff --git a/tensorflow/training/docker/2.3/py3/Dockerfile.cpu b/tensorflow/training/docker/2.3/py3/Dockerfile.cpu index cf9dbe147ec4..340dc39d57b5 100644 --- a/tensorflow/training/docker/2.3/py3/Dockerfile.cpu +++ b/tensorflow/training/docker/2.3/py3/Dockerfile.cpu @@ -125,7 +125,7 @@ RUN ${PIP} install --no-cache-dir -U \ scipy==1.5.2 \ scikit-learn==0.23 \ pandas==1.1 \ - Pillow==7.2.0 \ + Pillow==8.2.0 \ python-dateutil==2.8.1 \ # install PyYAML>=5.4.1 to avoid conflict with latest awscli "pyYAML>=5.4.1,<5.5" \ diff --git a/tensorflow/training/docker/2.3/py3/cu102/Dockerfile.gpu b/tensorflow/training/docker/2.3/py3/cu102/Dockerfile.gpu index e739491dfdc9..3ece0ccfd9f4 100644 --- a/tensorflow/training/docker/2.3/py3/cu102/Dockerfile.gpu +++ b/tensorflow/training/docker/2.3/py3/cu102/Dockerfile.gpu @@ -174,7 +174,7 @@ RUN ${PIP} install --no-cache-dir -U \ scipy==1.5.2 \ scikit-learn==0.23 \ pandas==1.1 \ - Pillow==7.2.0 \ + Pillow==8.2.0 \ python-dateutil==2.8.1 \ # install PyYAML>=5.4.1 to avoid conflict with latest awscli "pyYAML>=5.4.1,<5.5" \ diff --git a/tensorflow/training/docker/2.3/py3/cu110/Dockerfile.gpu b/tensorflow/training/docker/2.3/py3/cu110/Dockerfile.gpu index fc708bd8ce49..fbd719df3e61 100644 --- a/tensorflow/training/docker/2.3/py3/cu110/Dockerfile.gpu +++ b/tensorflow/training/docker/2.3/py3/cu110/Dockerfile.gpu @@ -182,7 +182,7 @@ RUN ${PIP} install --no-cache-dir -U \ scipy==1.5.2 \ scikit-learn==0.23 \ pandas==1.1 \ - Pillow==7.2.0 \ + Pillow==8.2.0 \ python-dateutil==2.8.1 \ # install PyYAML>=5.4.1 to avoid conflict with latest awscli "pyYAML>=5.4.1,<5.5" \ diff --git a/test/sagemaker_tests/tensorflow/tensorflow2_training/integration/sagemaker/test_mnist.py b/test/sagemaker_tests/tensorflow/tensorflow2_training/integration/sagemaker/test_mnist.py index 4a954d529337..fe371c9eeea7 100755 --- a/test/sagemaker_tests/tensorflow/tensorflow2_training/integration/sagemaker/test_mnist.py +++ b/test/sagemaker_tests/tensorflow/tensorflow2_training/integration/sagemaker/test_mnist.py @@ -191,15 +191,15 @@ def test_smdebug(sagemaker_session, ecr_image, instance_type, framework_version) @pytest.mark.model("mnist") @pytest.mark.skip_cpu @pytest.mark.skip_py2_containers -def test_smdataparallel_smmodelparallel_mnist(sagemaker_session, instance_type, ecr_image, tmpdir, framework_version): +def test_smdataparallel_smmodelparallel_mnist(n_virginia_sagemaker_session, instance_type, n_virginia_ecr_image, tmpdir, framework_version): """ Tests SM Distributed DataParallel and ModelParallel single-node via script mode This test has been added for SM DataParallelism and ModelParallelism tests for re:invent. TODO: Consider reworking these tests after re:Invent releases are done """ instance_type = "ml.p3.16xlarge" - _, image_framework_version = get_framework_and_version_from_tag(ecr_image) - image_cuda_version = get_cuda_version_from_tag(ecr_image) + _, image_framework_version = get_framework_and_version_from_tag(n_virginia_ecr_image) + image_cuda_version = get_cuda_version_from_tag(n_virginia_ecr_image) if Version(image_framework_version) < Version("2.3.1") or image_cuda_version != "cu110": pytest.skip("SMD Model and Data Parallelism are only supported on CUDA 11, and on TensorFlow 2.3.1 or higher") smmodelparallel_path = os.path.join(RESOURCE_PATH, 'smmodelparallel') @@ -209,12 +209,12 @@ def test_smdataparallel_smmodelparallel_mnist(sagemaker_session, instance_type, instance_count=1, instance_type=instance_type, source_dir=smmodelparallel_path, - sagemaker_session=sagemaker_session, - image_uri=ecr_image, + sagemaker_session=n_virginia_sagemaker_session, + image_uri=n_virginia_ecr_image, framework_version=framework_version, py_version='py3') - estimator = _disable_sm_profiler(sagemaker_session.boto_region_name, estimator) + estimator = _disable_sm_profiler(n_virginia_sagemaker_session.boto_region_name, estimator) estimator.fit() diff --git a/test/sagemaker_tests/tensorflow/tensorflow2_training/integration/sagemaker/test_smdataparallel.py b/test/sagemaker_tests/tensorflow/tensorflow2_training/integration/sagemaker/test_smdataparallel.py index 404e9674f1c9..71c97acd110d 100644 --- a/test/sagemaker_tests/tensorflow/tensorflow2_training/integration/sagemaker/test_smdataparallel.py +++ b/test/sagemaker_tests/tensorflow/tensorflow2_training/integration/sagemaker/test_smdataparallel.py @@ -58,12 +58,12 @@ def can_run_smdataparallel_efa(ecr_image): @pytest.mark.skip_cpu @pytest.mark.skip_py2_containers def test_distributed_training_smdataparallel_script_mode( - sagemaker_session, instance_type, ecr_image, tmpdir, framework_version + n_virginia_sagemaker_session, instance_type, n_virginia_ecr_image, tmpdir, framework_version ): """ Tests SMDataParallel single-node command via script mode """ - validate_or_skip_smdataparallel(ecr_image) + validate_or_skip_smdataparallel(n_virginia_ecr_image) instance_type = "ml.p3.16xlarge" distribution = {"smdistributed": {"dataparallel": {"enabled": True}}} estimator = TensorFlow( @@ -72,10 +72,10 @@ def test_distributed_training_smdataparallel_script_mode( role='SageMakerRole', instance_type=instance_type, instance_count=1, - image_uri=ecr_image, + image_uri=n_virginia_ecr_image, framework_version=framework_version, py_version='py3', - sagemaker_session=sagemaker_session, + sagemaker_session=n_virginia_sagemaker_session, distribution=distribution) estimator.fit(job_name=unique_name_from_base('test-tf-smdataparallel')) diff --git a/test/sagemaker_tests/tensorflow/tensorflow2_training/integration/sagemaker/test_smmodelparallel.py b/test/sagemaker_tests/tensorflow/tensorflow2_training/integration/sagemaker/test_smmodelparallel.py index fe275168091e..f089684a50d6 100644 --- a/test/sagemaker_tests/tensorflow/tensorflow2_training/integration/sagemaker/test_smmodelparallel.py +++ b/test/sagemaker_tests/tensorflow/tensorflow2_training/integration/sagemaker/test_smmodelparallel.py @@ -122,12 +122,12 @@ def test_smmodelparallel_multinode_efa(n_virginia_sagemaker_session, efa_instanc @pytest.mark.skip_cpu @pytest.mark.skip_py2_containers @pytest.mark.parametrize("test_script, num_processes", [("tf2_conv.py", 2), ("tf2_conv_xla.py", 2), ("smmodelparallel_hvd2_conv.py", 4), ("send_receive_checkpoint.py", 2), ("tf2_checkpoint_test.py", 2)]) -def test_smmodelparallel(sagemaker_session, instance_type, ecr_image, tmpdir, framework_version, test_script, num_processes): +def test_smmodelparallel(n_virginia_sagemaker_session, instance_type, n_virginia_ecr_image, tmpdir, framework_version, test_script, num_processes): """ Tests SM Modelparallel in sagemaker """ instance_type = "ml.p3.16xlarge" - validate_or_skip_smmodelparallel(ecr_image) + validate_or_skip_smmodelparallel(n_virginia_ecr_image) smmodelparallel_path = os.path.join(RESOURCE_PATH, 'smmodelparallel') estimator = TensorFlow(entry_point=test_script, role='SageMakerRole', @@ -141,8 +141,8 @@ def test_smmodelparallel(sagemaker_session, instance_type, ecr_image, tmpdir, fr "custom_mpi_options": "-verbose --mca orte_base_help_aggregate 0 -x RDMAV_FORK_SAFE=1 ", } }, - sagemaker_session=sagemaker_session, - image_uri=ecr_image, + sagemaker_session=n_virginia_sagemaker_session, + image_uri=n_virginia_ecr_image, framework_version=framework_version, py_version='py3', base_job_name='smp-test1') @@ -156,12 +156,12 @@ def test_smmodelparallel(sagemaker_session, instance_type, ecr_image, tmpdir, fr @pytest.mark.skip_cpu @pytest.mark.skip_py2_containers @pytest.mark.parametrize("test_script, num_processes", [("smmodelparallel_hvd2_conv_multinode.py", 2)]) -def test_smmodelparallel_multinode(sagemaker_session, instance_type, ecr_image, tmpdir, framework_version, test_script, num_processes): +def test_smmodelparallel_multinode(n_virginia_sagemaker_session, instance_type, n_virginia_ecr_image, tmpdir, framework_version, test_script, num_processes): """ Tests SM Modelparallel in sagemaker """ instance_type = "ml.p3.16xlarge" - validate_or_skip_smmodelparallel(ecr_image) + validate_or_skip_smmodelparallel(n_virginia_ecr_image) smmodelparallel_path = os.path.join(RESOURCE_PATH, 'smmodelparallel') estimator = TensorFlow(entry_point=test_script, role='SageMakerRole', @@ -175,8 +175,8 @@ def test_smmodelparallel_multinode(sagemaker_session, instance_type, ecr_image, "custom_mpi_options": "-verbose --mca orte_base_help_aggregate 0 -x RDMAV_FORK_SAFE=1 ", } }, - sagemaker_session=sagemaker_session, - image_uri=ecr_image, + sagemaker_session=n_virginia_sagemaker_session, + image_uri=n_virginia_ecr_image, framework_version=framework_version, py_version='py3', base_job_name='smp-test2')