From d477a33e4b3e097aca9cf837596e2374a96d194c Mon Sep 17 00:00:00 2001
From: Andrew Packer <apacker1@gmail.com>
Date: Tue, 28 Aug 2018 18:20:04 -0700
Subject: [PATCH 1/4] Add volume KMS key to transformer

This adds support for the optional VolumeKmsKeyId parameter of CreateTuningJob
---
 CHANGELOG.rst                  |  1 +
 src/sagemaker/transformer.py   | 13 +++++++++----
 tests/unit/test_transformer.py | 13 +++++++++----
 3 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 2c04fe4e85..b58d9d6fe6 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -11,6 +11,7 @@ CHANGELOG
 =====
 
 * feature: add support for TensorFlow 1.9
+* enhancement: Add support for volume KMS key to Transformer
 
 1.9.1
 =====
diff --git a/src/sagemaker/transformer.py b/src/sagemaker/transformer.py
index 2d707eaa54..4504aed2db 100644
--- a/src/sagemaker/transformer.py
+++ b/src/sagemaker/transformer.py
@@ -23,7 +23,7 @@ class Transformer(object):
 
     def __init__(self, model_name, instance_count, instance_type, strategy=None, assemble_with=None, output_path=None,
                  output_kms_key=None, accept=None, max_concurrent_transforms=None, max_payload=None, tags=None,
-                 env=None, base_transform_job_name=None, sagemaker_session=None):
+                 env=None, base_transform_job_name=None, sagemaker_session=None, volume_kms_key=None):
         """Initialize a ``Transformer``.
 
         Args:
@@ -50,6 +50,8 @@ def __init__(self, model_name, instance_count, instance_type, strategy=None, ass
             sagemaker_session (sagemaker.session.Session): Session object which manages interactions with
                 Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one
                 using the default AWS configuration chain.
+            volume_kms_key (str): Optional. KMS key ID for encrypting the volume attached to the ML
+                compute instance (default: None).
         """
         self.model_name = model_name
         self.strategy = strategy
@@ -62,6 +64,7 @@ def __init__(self, model_name, instance_count, instance_type, strategy=None, ass
 
         self.instance_count = instance_count
         self.instance_type = instance_type
+        self.volume_kms_key = volume_kms_key
 
         self.max_concurrent_transforms = max_concurrent_transforms
         self.max_payload = max_payload
@@ -159,6 +162,7 @@ def _prepare_init_params_from_job_description(cls, job_details):
         init_params['model_name'] = job_details['ModelName']
         init_params['instance_count'] = job_details['TransformResources']['InstanceCount']
         init_params['instance_type'] = job_details['TransformResources']['InstanceType']
+        init_params['volume_kms_key'] = job_details['TransformResources'].get('VolumeKmsKeyId')
         init_params['strategy'] = job_details.get('BatchStrategy')
         init_params['assemble_with'] = job_details['TransformOutput'].get('AssembleWith')
         init_params['output_path'] = job_details['TransformOutput']['S3OutputPath']
@@ -200,7 +204,8 @@ def _load_config(data, data_type, content_type, compression_type, split_type, tr
         output_config = _TransformJob._prepare_output_config(transformer.output_path, transformer.output_kms_key,
                                                              transformer.assemble_with, transformer.accept)
 
-        resource_config = _TransformJob._prepare_resource_config(transformer.instance_count, transformer.instance_type)
+        resource_config = _TransformJob._prepare_resource_config(transformer.instance_count, transformer.instance_type,
+                                                                 transformer.volume_kms_key)
 
         return {'input_config': input_config,
                 'output_config': output_config,
@@ -241,5 +246,5 @@ def _prepare_output_config(s3_path, kms_key_id, assemble_with, accept):
         return config
 
     @staticmethod
-    def _prepare_resource_config(instance_count, instance_type):
-        return {'InstanceCount': instance_count, 'InstanceType': instance_type}
+    def _prepare_resource_config(instance_count, instance_type, volume_kms_key):
+        return {'InstanceCount': instance_count, 'InstanceType': instance_type, 'VolumeKmsKeyId': volume_kms_key}
diff --git a/tests/unit/test_transformer.py b/tests/unit/test_transformer.py
index 006970fd39..200d5df52d 100644
--- a/tests/unit/test_transformer.py
+++ b/tests/unit/test_transformer.py
@@ -23,6 +23,7 @@
 
 INSTANCE_COUNT = 1
 INSTANCE_TYPE = 'ml.m4.xlarge'
+KMS_KEY_ID = 'kms-key-id'
 
 S3_DATA_TYPE = 'S3Prefix'
 S3_BUCKET = 'bucket'
@@ -48,7 +49,8 @@ def sagemaker_session():
 @pytest.fixture()
 def transformer(sagemaker_session):
     return Transformer(MODEL_NAME, INSTANCE_COUNT, INSTANCE_TYPE,
-                       output_path=OUTPUT_PATH, sagemaker_session=sagemaker_session)
+                       output_path=OUTPUT_PATH, sagemaker_session=sagemaker_session,
+                       volume_kms_key=KMS_KEY_ID)
 
 
 @patch('sagemaker.transformer._TransformJob.start_new')
@@ -178,7 +180,8 @@ def test_prepare_init_params_from_job_description_all_keys(transformer):
         'ModelName': MODEL_NAME,
         'TransformResources': {
             'InstanceCount': INSTANCE_COUNT,
-            'InstanceType': INSTANCE_TYPE
+            'InstanceType': INSTANCE_TYPE,
+            'VolumeKmsKeyId': KMS_KEY_ID
         },
         'BatchStrategy': None,
         'TransformOutput': {
@@ -197,6 +200,7 @@ def test_prepare_init_params_from_job_description_all_keys(transformer):
     assert init_params['model_name'] == MODEL_NAME
     assert init_params['instance_count'] == INSTANCE_COUNT
     assert init_params['instance_type'] == INSTANCE_TYPE
+    assert init_params['volume_kms_key'] == KMS_KEY_ID
 
 
 # _TransformJob tests
@@ -227,6 +231,7 @@ def test_load_config(transformer):
         'resource_config': {
             'InstanceCount': INSTANCE_COUNT,
             'InstanceType': INSTANCE_TYPE,
+            'VolumeKmsKeyId': KMS_KEY_ID,
         },
     }
 
@@ -292,8 +297,8 @@ def test_prepare_output_config_with_optional_params():
 
 
 def test_prepare_resource_config():
-    config = _TransformJob._prepare_resource_config(INSTANCE_COUNT, INSTANCE_TYPE)
-    assert config == {'InstanceCount': INSTANCE_COUNT, 'InstanceType': INSTANCE_TYPE}
+    config = _TransformJob._prepare_resource_config(INSTANCE_COUNT, INSTANCE_TYPE, KMS_KEY_ID)
+    assert config == {'InstanceCount': INSTANCE_COUNT, 'InstanceType': INSTANCE_TYPE, 'VolumeKmsKeyId': KMS_KEY_ID}
 
 
 def test_transform_job_wait(sagemaker_session):

From 04cb7834ee11b365192a0f3530e691877bf0e3df Mon Sep 17 00:00:00 2001
From: Andrew Packer <apacker@amazon.com>
Date: Wed, 29 Aug 2018 13:39:06 -0700
Subject: [PATCH 2/4] Fix changelog and add integ test

This also adds volume KMS key to Estimator.transformer()
---
 CHANGELOG.rst                   |  2 +-
 src/sagemaker/estimator.py      | 12 +++--
 tests/integ/kms_utils.py        | 89 +++++++++++++++++++++++++++++++++
 tests/integ/test_transformer.py | 16 ++++--
 tests/unit/test_estimator.py    |  3 +-
 5 files changed, 112 insertions(+), 10 deletions(-)
 create mode 100644 tests/integ/kms_utils.py

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index b58d9d6fe6..91ae976d43 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -6,12 +6,12 @@ CHANGELOG
 ========
 
 * bug-fix: Local Mode: Create output/data directory expected by SageMaker Container.
+* enhancement: Add support for volume KMS key to Transformer
 
 1.9.2
 =====
 
 * feature: add support for TensorFlow 1.9
-* enhancement: Add support for volume KMS key to Transformer
 
 1.9.1
 =====
diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py
index 9febff5240..d4633aa633 100644
--- a/src/sagemaker/estimator.py
+++ b/src/sagemaker/estimator.py
@@ -328,7 +328,7 @@ def delete_endpoint(self):
 
     def transformer(self, instance_count, instance_type, strategy=None, assemble_with=None, output_path=None,
                     output_kms_key=None, accept=None, env=None, max_concurrent_transforms=None,
-                    max_payload=None, tags=None, role=None):
+                    max_payload=None, tags=None, role=None, volume_kms_key=None):
         """Return a ``Transformer`` that uses a SageMaker Model based on the training job. It reuses the
         SageMaker Session and base job name used by the Estimator.
 
@@ -350,6 +350,8 @@ def transformer(self, instance_count, instance_type, strategy=None, assemble_wit
                 the training job are used for the transform job.
             role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, which is also used during
                 transform jobs. If not specified, the role from the Estimator will be used.
+            volume_kms_key (str): Optional. KMS key ID for encrypting the volume attached to the ML
+                compute instance (default: None).
         """
         self._ensure_latest_training_job()
 
@@ -360,7 +362,7 @@ def transformer(self, instance_count, instance_type, strategy=None, assemble_wit
                            output_path=output_path, output_kms_key=output_kms_key, accept=accept,
                            max_concurrent_transforms=max_concurrent_transforms, max_payload=max_payload,
                            env=env, tags=tags, base_transform_job_name=self.base_job_name,
-                           sagemaker_session=self.sagemaker_session)
+                           volume_kms_key=volume_kms_key, sagemaker_session=self.sagemaker_session)
 
     @property
     def training_job_analytics(self):
@@ -756,7 +758,7 @@ def _update_init_params(cls, hp, tf_arguments):
 
     def transformer(self, instance_count, instance_type, strategy=None, assemble_with=None, output_path=None,
                     output_kms_key=None, accept=None, env=None, max_concurrent_transforms=None,
-                    max_payload=None, tags=None, role=None, model_server_workers=None):
+                    max_payload=None, tags=None, role=None, model_server_workers=None, volume_kms_key=None):
         """Return a ``Transformer`` that uses a SageMaker Model based on the training job. It reuses the
         SageMaker Session and base job name used by the Estimator.
 
@@ -780,6 +782,8 @@ def transformer(self, instance_count, instance_type, strategy=None, assemble_wit
                 transform jobs. If not specified, the role from the Estimator will be used.
             model_server_workers (int): Optional. The number of worker processes used by the inference server.
                 If None, server will use one worker per vCPU.
+            volume_kms_key (str): Optional. KMS key ID for encrypting the volume attached to the ML
+                compute instance (default: None).
         """
         self._ensure_latest_training_job()
         role = role or self.role
@@ -799,7 +803,7 @@ def transformer(self, instance_count, instance_type, strategy=None, assemble_wit
                            output_path=output_path, output_kms_key=output_kms_key, accept=accept,
                            max_concurrent_transforms=max_concurrent_transforms, max_payload=max_payload,
                            env=transform_env, tags=tags, base_transform_job_name=self.base_job_name,
-                           sagemaker_session=self.sagemaker_session)
+                           volume_kms_key=volume_kms_key, sagemaker_session=self.sagemaker_session)
 
 
 def _s3_uri_prefix(channel_name, s3_data):
diff --git a/tests/integ/kms_utils.py b/tests/integ/kms_utils.py
new file mode 100644
index 0000000000..f607041669
--- /dev/null
+++ b/tests/integ/kms_utils.py
@@ -0,0 +1,89 @@
+# Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+
+KEY_ALIAS = "SageMakerKmsKey"
+KEY_POLICY = '''
+{{
+  "Version": "2012-10-17",
+  "Id": "sagemaker-kms-integ-test-policy",
+  "Statement": [
+    {{
+      "Sid": "Enable IAM User Permissions",
+      "Effect": "Allow",
+      "Principal": {{
+        "AWS": "arn:aws:iam::{account_id}:root"
+      }},
+      "Action": "kms:*",
+      "Resource": "*"
+    }},
+    {{
+      "Sid": "Allow use of the key",
+      "Effect": "Allow",
+      "Principal": {{
+        "AWS": "arn:aws:iam::{account_id}:role/SageMakerRole"
+      }},
+      "Action": [
+        "kms:Encrypt",
+        "kms:Decrypt",
+        "kms:ReEncrypt*",
+        "kms:GenerateDataKey*",
+        "kms:DescribeKey"
+      ],
+      "Resource": "*"
+    }},
+    {{
+      "Sid": "Allow attachment of persistent resources",
+      "Effect": "Allow",
+      "Principal": {{
+        "AWS": "arn:aws:iam::{account_id}:role/SageMakerRole"
+      }},
+      "Action": [
+        "kms:CreateGrant",
+        "kms:ListGrants",
+        "kms:RevokeGrant"
+      ],
+      "Resource": "*",
+      "Condition": {{
+        "Bool": {{
+          "kms:GrantIsForAWSResource": "true"
+        }}
+      }}
+    }}
+  ]
+}}
+'''
+
+
+def _get_kms_key_arn(kms_client, alias):
+    try:
+        response = kms_client.describe_key(KeyId='alias/' + alias)
+        return response['KeyMetadata']['Arn']
+    except kms_client.exceptions.NotFoundException:
+        return None
+
+def _create_kms_key(kms_client, account_id):
+    response = kms_client.create_key(
+        Policy=KEY_POLICY.format(account_id=account_id),
+        Description='KMS key for SageMaker Python SDK integ tests',
+    )
+    key_arn = response['KeyMetadata']['Arn']
+    response = kms_client.create_alias(AliasName='alias/' + KEY_ALIAS, TargetKeyId=key_arn)
+    return key_arn
+
+def get_or_create_kms_key(kms_client, account_id):
+    kms_key_arn = _get_kms_key_arn(kms_client, KEY_ALIAS)
+    if kms_key_arn is not None:
+        return kms_key_arn
+    else:
+        return _create_kms_key(kms_client, account_id)
diff --git a/tests/integ/test_transformer.py b/tests/integ/test_transformer.py
index 1b5a5160fd..6f44f7930a 100644
--- a/tests/integ/test_transformer.py
+++ b/tests/integ/test_transformer.py
@@ -23,6 +23,7 @@
 from sagemaker.mxnet import MXNet
 from sagemaker.transformer import Transformer
 from tests.integ import DATA_DIR, TRAINING_DEFAULT_TIMEOUT_MINUTES
+from tests.integ.kms_utils import get_or_create_kms_key
 from tests.integ.timeout import timeout
 
 
@@ -47,9 +48,16 @@ def test_transform_mxnet(sagemaker_session):
     transform_input = mx.sagemaker_session.upload_data(path=transform_input_path,
                                                        key_prefix=transform_input_key_prefix)
 
-    transformer = _create_transformer_and_transform_job(mx, transform_input)
-    transformer.wait()
+    sts_client = sagemaker_session.boto_session.client('sts')
+    account_id = sts_client.get_caller_identity()['Account']
+    kms_client = sagemaker_session.boto_session.client('kms')
+    kms_key_arn = get_or_create_kms_key(kms_client, account_id)
 
+    transformer = _create_transformer_and_transform_job(mx, transform_input, kms_key_arn)
+    transformer.wait()
+    job_desc = transformer.sagemaker_session.sagemaker_client.describe_transform_job(
+        TransformJobName=transformer.latest_transform_job.name)
+    assert kms_key_arn == job_desc['TransformResources']['VolumeKmsKeyId']
 
 @pytest.mark.continuous_testing
 def test_attach_transform_kmeans(sagemaker_session):
@@ -90,7 +98,7 @@ def test_attach_transform_kmeans(sagemaker_session):
     attached_transformer.wait()
 
 
-def _create_transformer_and_transform_job(estimator, transform_input):
-    transformer = estimator.transformer(1, 'ml.m4.xlarge')
+def _create_transformer_and_transform_job(estimator, transform_input, volume_kms_key=None):
+    transformer = estimator.transformer(1, 'ml.m4.xlarge', volume_kms_key=volume_kms_key)
     transformer.transform(transform_input, content_type='text/csv')
     return transformer
diff --git a/tests/unit/test_estimator.py b/tests/unit/test_estimator.py
index d173f013fb..346d553f72 100644
--- a/tests/unit/test_estimator.py
+++ b/tests/unit/test_estimator.py
@@ -488,7 +488,7 @@ def test_framework_transformer_creation_with_optional_params(name_from_image, sa
     transformer = fw.transformer(INSTANCE_COUNT, INSTANCE_TYPE, strategy=strategy, assemble_with=assemble_with,
                                  output_path=OUTPUT_PATH, output_kms_key=kms_key, accept=accept, tags=TAGS,
                                  max_concurrent_transforms=max_concurrent_transforms, max_payload=max_payload,
-                                 env=env, role=new_role, model_server_workers=1)
+                                 volume_kms_key=kms_key, env=env, role=new_role, model_server_workers=1)
 
     sagemaker_session.create_model.assert_called_with(MODEL_IMAGE, new_role, MODEL_CONTAINER_DEF)
     assert transformer.strategy == strategy
@@ -501,6 +501,7 @@ def test_framework_transformer_creation_with_optional_params(name_from_image, sa
     assert transformer.env == env
     assert transformer.base_transform_job_name == base_name
     assert transformer.tags == TAGS
+    assert transformer.volume_kms_key == kms_key
 
 
 def test_ensure_latest_training_job(sagemaker_session):

From fb3773136a59c602ddec4c7bee3a466f30e7fca8 Mon Sep 17 00:00:00 2001
From: Andrew Packer <apacker@amazon.com>
Date: Wed, 29 Aug 2018 14:15:22 -0700
Subject: [PATCH 3/4] Fixing flake errors

---
 tests/integ/kms_utils.py        | 2 ++
 tests/integ/test_transformer.py | 1 +
 2 files changed, 3 insertions(+)

diff --git a/tests/integ/kms_utils.py b/tests/integ/kms_utils.py
index f607041669..047a585980 100644
--- a/tests/integ/kms_utils.py
+++ b/tests/integ/kms_utils.py
@@ -72,6 +72,7 @@ def _get_kms_key_arn(kms_client, alias):
     except kms_client.exceptions.NotFoundException:
         return None
 
+
 def _create_kms_key(kms_client, account_id):
     response = kms_client.create_key(
         Policy=KEY_POLICY.format(account_id=account_id),
@@ -81,6 +82,7 @@ def _create_kms_key(kms_client, account_id):
     response = kms_client.create_alias(AliasName='alias/' + KEY_ALIAS, TargetKeyId=key_arn)
     return key_arn
 
+
 def get_or_create_kms_key(kms_client, account_id):
     kms_key_arn = _get_kms_key_arn(kms_client, KEY_ALIAS)
     if kms_key_arn is not None:
diff --git a/tests/integ/test_transformer.py b/tests/integ/test_transformer.py
index 6f44f7930a..dc626b23d6 100644
--- a/tests/integ/test_transformer.py
+++ b/tests/integ/test_transformer.py
@@ -59,6 +59,7 @@ def test_transform_mxnet(sagemaker_session):
         TransformJobName=transformer.latest_transform_job.name)
     assert kms_key_arn == job_desc['TransformResources']['VolumeKmsKeyId']
 
+
 @pytest.mark.continuous_testing
 def test_attach_transform_kmeans(sagemaker_session):
     data_path = os.path.join(DATA_DIR, 'one_p_mnist')

From 8ef3bbd2f89b9e232c4fa1ad2fb3d06774888209 Mon Sep 17 00:00:00 2001
From: Andrew Packer <apacker@amazon.com>
Date: Wed, 26 Sep 2018 16:30:45 -0700
Subject: [PATCH 4/4] Fix resource config preparation for volume kms key

---
 src/sagemaker/transformer.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/sagemaker/transformer.py b/src/sagemaker/transformer.py
index 4504aed2db..a9c17afdd0 100644
--- a/src/sagemaker/transformer.py
+++ b/src/sagemaker/transformer.py
@@ -247,4 +247,9 @@ def _prepare_output_config(s3_path, kms_key_id, assemble_with, accept):
 
     @staticmethod
     def _prepare_resource_config(instance_count, instance_type, volume_kms_key):
-        return {'InstanceCount': instance_count, 'InstanceType': instance_type, 'VolumeKmsKeyId': volume_kms_key}
+        config = {'InstanceCount': instance_count, 'InstanceType': instance_type}
+
+        if volume_kms_key is not None:
+            config['VolumeKmsKeyId'] = volume_kms_key
+
+        return config