From 1ec8e89968eb5a4b213d6490f6fd1cabd4a4040f Mon Sep 17 00:00:00 2001 From: Lauren Yu <6631887+laurenyu@users.noreply.github.com> Date: Tue, 26 Jun 2018 15:13:07 -0700 Subject: [PATCH 1/2] Fix docs about s3_input usage --- src/sagemaker/session.py | 2 +- src/sagemaker/tensorflow/README.rst | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py index 62bfdca63b..2a945c9a49 100644 --- a/src/sagemaker/session.py +++ b/src/sagemaker/session.py @@ -890,7 +890,7 @@ def __init__(self, s3_data, distribution='FullyReplicated', compression=None, s3_data (str): Defines the location of s3 data to train on. distribution (str): Valid values: 'FullyReplicated', 'ShardedByS3Key' (default: 'FullyReplicated'). - compression (str): Valid values: 'Gzip', 'Bzip2', 'Lzop' (default: None). + compression (str): Valid values: 'Gzip', None (default: None). This is used only in Pipe input mode. content_type (str): MIME type of the input data (default: None). record_wrapping (str): Valid values: 'RecordIO' (default: None). s3_data_type (str): Value values: 'S3Prefix', 'ManifestFile'. If 'S3Prefix', ``s3_data`` defines diff --git a/src/sagemaker/tensorflow/README.rst b/src/sagemaker/tensorflow/README.rst index 2e21468622..89e6890c51 100644 --- a/src/sagemaker/tensorflow/README.rst +++ b/src/sagemaker/tensorflow/README.rst @@ -482,6 +482,7 @@ Required argument - ``str``: An S3 URI, for example ``s3://my-bucket/my-training-data``, which indicates the dataset's location. - ``dict[str, str]``: A dictionary mapping channel names to S3 locations, for example ``{'train': 's3://my-bucket/my-training-data/train', 'test': 's3://my-bucket/my-training-data/test'}`` + - ``sagemaker.session.s3_input``: channel configuration for S3 data sources that can provide additional information about the training dataset. See `the API docs `_ for full details. When the training job starts in SageMaker the container will download the dataset. Both ``train_input_fn`` and ``eval_input_fn`` functions have a parameter called ``training_dir`` which From 6949684c5c1bf9b2591847b69bd23606d90e7927 Mon Sep 17 00:00:00 2001 From: Lauren Yu <6631887+laurenyu@users.noreply.github.com> Date: Tue, 26 Jun 2018 15:23:32 -0700 Subject: [PATCH 2/2] Fix wording --- src/sagemaker/estimator.py | 4 ++-- src/sagemaker/tensorflow/README.rst | 2 +- src/sagemaker/tensorflow/estimator.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py index 53005dc44c..c7b03099e7 100644 --- a/src/sagemaker/estimator.py +++ b/src/sagemaker/estimator.py @@ -165,8 +165,8 @@ def fit(self, inputs, wait=True, logs=True, job_name=None): training data, you can specify a dict mapping channel names to strings or :func:`~sagemaker.session.s3_input` objects. * (sagemaker.session.s3_input) - channel configuration for S3 data sources that can provide - additional information about the training dataset. See :func:`sagemaker.session.s3_input` - for full details. + additional information as well as the path to the training dataset. + See :func:`sagemaker.session.s3_input` for full details. wait (bool): Whether the call should wait until the job completes (default: True). logs (bool): Whether to show the logs produced by the job. Only meaningful when wait is True (default: True). diff --git a/src/sagemaker/tensorflow/README.rst b/src/sagemaker/tensorflow/README.rst index 89e6890c51..3c6154838e 100644 --- a/src/sagemaker/tensorflow/README.rst +++ b/src/sagemaker/tensorflow/README.rst @@ -482,7 +482,7 @@ Required argument - ``str``: An S3 URI, for example ``s3://my-bucket/my-training-data``, which indicates the dataset's location. - ``dict[str, str]``: A dictionary mapping channel names to S3 locations, for example ``{'train': 's3://my-bucket/my-training-data/train', 'test': 's3://my-bucket/my-training-data/test'}`` - - ``sagemaker.session.s3_input``: channel configuration for S3 data sources that can provide additional information about the training dataset. See `the API docs `_ for full details. + - ``sagemaker.session.s3_input``: channel configuration for S3 data sources that can provide additional information as well as the path to the training dataset. See `the API docs `_ for full details. When the training job starts in SageMaker the container will download the dataset. Both ``train_input_fn`` and ``eval_input_fn`` functions have a parameter called ``training_dir`` which diff --git a/src/sagemaker/tensorflow/estimator.py b/src/sagemaker/tensorflow/estimator.py index 26149a3e30..88254e6d31 100644 --- a/src/sagemaker/tensorflow/estimator.py +++ b/src/sagemaker/tensorflow/estimator.py @@ -216,8 +216,8 @@ def fit(self, inputs, wait=True, logs=True, job_name=None, run_tensorboard_local training data, you can specify a dict mapping channel names to strings or :func:`~sagemaker.session.s3_input` objects. (sagemaker.session.s3_input) - channel configuration for S3 data sources that can provide - additional information about the training dataset. See :func:`sagemaker.session.s3_input` - for full details. + additional information as well as the path to the training dataset. + See :func:`sagemaker.session.s3_input` for full details. wait (bool): Whether the call should wait until the job completes (default: True). logs (bool): Whether to show the logs produced by the job. Only meaningful when wait is True (default: True).