1010# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
1111# ANY KIND, either express or implied. See the License for the specific
1212# language governing permissions and limitations under the License.
13- """Placeholder docstring"""
13+ """Placeholder docstring. """
1414from __future__ import print_function , absolute_import
1515
1616from abc import ABCMeta , abstractmethod
2222
2323from sagemaker .session import Session
2424from sagemaker .utils import DeferredError
25+ from sagemaker .lineage import artifact
2526
2627logger = logging .getLogger (__name__ )
2728
3637
3738
3839class AnalyticsMetricsBase (with_metaclass (ABCMeta , object )):
39- """Base class for tuning job or training job analytics classes. Understands
40- common functionality like persistence and caching.
40+ """Base class for tuning job or training job analytics classes.
41+ Understands common functionality like persistence and caching.
4142 """
4243
4344 def __init__ (self ):
@@ -52,8 +53,8 @@ def export_csv(self, filename):
5253 self .dataframe ().to_csv (filename )
5354
5455 def dataframe (self , force_refresh = False ):
55- """A pandas dataframe with lots of interesting results about this
56- object. Created by calling SageMaker List and Describe APIs and
56+ """A pandas dataframe with lots of interesting results about this object.
57+ Created by calling SageMaker List and Describe APIs and
5758 converting them into a convenient tabular summary.
5859
5960 Args:
@@ -71,17 +72,15 @@ def _fetch_dataframe(self):
7172 """Sub-class must calculate the dataframe and return it."""
7273
7374 def clear_cache (self ):
74- """Clear the object of all local caches of API methods, so that the next
75- time any properties are accessed they will be refreshed from the
75+ """Clear the object of all local caches of API methods.
76+ So that the next time any properties are accessed they will be refreshed from the
7677 service.
7778 """
7879 self ._dataframe = None
7980
8081
8182class HyperparameterTuningJobAnalytics (AnalyticsMetricsBase ):
82- """Fetch results about a hyperparameter tuning job and make them accessible
83- for analytics.
84- """
83+ """Fetch results about a hyperparameter tuning job and make them accessible for analytics."""
8584
8685 def __init__ (self , hyperparameter_tuning_job_name , sagemaker_session = None ):
8786 """Initialize a ``HyperparameterTuningJobAnalytics`` instance.
@@ -104,7 +103,7 @@ def __init__(self, hyperparameter_tuning_job_name, sagemaker_session=None):
104103
105104 @property
106105 def name (self ):
107- """Name of the HyperparameterTuningJob being analyzed"""
106+ """Name of the HyperparameterTuningJob being analyzed. """
108107 return self ._tuning_job_name
109108
110109 def __repr__ (self ):
@@ -156,8 +155,8 @@ def reshape(training_summary):
156155
157156 @property
158157 def tuning_ranges (self ):
159- """A dictionary describing the ranges of all tuned hyperparameters. The
160- keys are the names of the hyperparameter, and the values are the ranges.
158+ """A dictionary describing the ranges of all tuned hyperparameters.
159+ The keys are the names of the hyperparameter, and the values are the ranges.
161160
162161 The output can take one of two forms:
163162
@@ -208,16 +207,15 @@ def tuning_ranges(self):
208207 }
209208
210209 def _prepare_parameter_ranges (self , parameter_ranges ):
211- """Convert parameter ranges a dictionary using the parameter range names as the keys"""
210+ """Convert parameter ranges a dictionary using the parameter range names as the keys. """
212211 out = {}
213212 for _ , ranges in parameter_ranges .items ():
214213 for param in ranges :
215214 out [param ["Name" ]] = param
216215 return out
217216
218217 def description (self , force_refresh = False ):
219- """Call ``DescribeHyperParameterTuningJob`` for the hyperparameter
220- tuning job.
218+ """Call ``DescribeHyperParameterTuningJob`` for the hyperparameter tuning job.
221219
222220 Args:
223221 force_refresh (bool): Set to True to fetch the latest data from
@@ -236,8 +234,7 @@ def description(self, force_refresh=False):
236234 return self ._tuning_job_describe_result
237235
238236 def training_job_summaries (self , force_refresh = False ):
239- """A (paginated) list of everything from
240- ``ListTrainingJobsForTuningJob``.
237+ """A (paginated) list of everything from ``ListTrainingJobsForTuningJob``.
241238
242239 Args:
243240 force_refresh (bool): Set to True to fetch the latest data from
@@ -270,9 +267,7 @@ def training_job_summaries(self, force_refresh=False):
270267
271268
272269class TrainingJobAnalytics (AnalyticsMetricsBase ):
273- """Fetch training curve data from CloudWatch Metrics for a specific training
274- job.
275- """
270+ """Fetch training curve data from CloudWatch Metrics for a specific training job."""
276271
277272 CLOUDWATCH_NAMESPACE = "/aws/sagemaker/TrainingJobs"
278273
@@ -318,7 +313,7 @@ def __init__(
318313
319314 @property
320315 def name (self ):
321- """Name of the TrainingJob being analyzed"""
316+ """Name of the TrainingJob being analyzed. """
322317 return self ._training_job_name
323318
324319 def __repr__ (self ):
@@ -365,7 +360,7 @@ def _fetch_dataframe(self):
365360 return pd .DataFrame (self ._data )
366361
367362 def _fetch_metric (self , metric_name ):
368- """Fetch all the values of a named metric, and add them to _data
363+ """Fetch all the values of a named metric, and add them to _data.
369364
370365 Args:
371366 metric_name: The metric name to fetch.
@@ -425,6 +420,75 @@ def _metric_names_for_training_job(self):
425420 return metric_names
426421
427422
423+ class ArtifactAnalytics (AnalyticsMetricsBase ):
424+ """Fetch artifact data and make them accessible for analytics."""
425+
426+ def __init__ (
427+ self ,
428+ sort_by = None ,
429+ sort_order = None ,
430+ source_uri = None ,
431+ artifact_type = None ,
432+ sagemaker_session = None ,
433+ ):
434+ """Initialize a ``ArtifactAnalytics`` instance.
435+
436+ Args:
437+ sort_by (str, optional): The name of the resource property used to sort
438+ the set of artifacts. Currently only support for sort by Name
439+ sort_order(str optional): How trial components are ordered, valid values are Ascending
440+ and Descending. The default is Descending.
441+ source_uri(dict optional): The artifact source uri for filtering.
442+ artifact_type(dict optional): The artifact type for filtering.
443+ sagemaker_session (obj, optional): Sagemaker session. Defaults to None.
444+ """
445+ self ._sort_by = sort_by if sort_by == "Name" else None
446+ self ._sort_order = sort_order
447+ self ._source_uri = source_uri
448+ self ._artifact_type = artifact_type
449+ self ._sagemaker_session = sagemaker_session
450+ super (ArtifactAnalytics , self ).__init__ ()
451+ self .clear_cache ()
452+
453+ def __repr__ (self ):
454+ """Human-readable representation override."""
455+ return "<sagemaker.ArtifactAnalytics>"
456+
457+ def _reshape_source_type (self , artifact_source_types ):
458+ """Reshape artifact source type."""
459+ out = OrderedDict ()
460+ for artifact_source_type in artifact_source_types :
461+ out ["ArtifactSourceType" ] = artifact_source_type
462+ return out
463+
464+ def _reshape (self , artifact_summary ):
465+ """Reshape artifact summary."""
466+ out = OrderedDict ()
467+ out ["ArtifactName" ] = artifact_summary .artifact_name
468+ out ["ArtifactArn" ] = artifact_summary .artifact_arn
469+ out ["ArtifactType" ] = artifact_summary .artifact_type
470+ out ["ArtifactSourceUri" ] = artifact_summary .source .source_uri
471+ out ["CreationTime" ] = artifact_summary .creation_time
472+ out ["LastModifiedTime" ] = artifact_summary .last_modified_time
473+ return out
474+
475+ def _fetch_dataframe (self ):
476+ """Return a pandas dataframe with all artifacts."""
477+ df = pd .DataFrame ([self ._reshape (artifact ) for artifact in self ._get_list_artifacts ()])
478+ return df
479+
480+ def _get_list_artifacts (self ):
481+ """List artifacts."""
482+ artifacts = artifact .Artifact .list (
483+ source_uri = self ._source_uri ,
484+ artifact_type = self ._artifact_type ,
485+ sort_by = self ._sort_by ,
486+ sort_order = self ._sort_order ,
487+ sagemaker_session = self ._sagemaker_session ,
488+ )
489+ return artifacts
490+
491+
428492class ExperimentAnalytics (AnalyticsMetricsBase ):
429493 """Fetch trial component data and make them accessible for analytics."""
430494
@@ -486,7 +550,7 @@ def __init__(
486550
487551 @property
488552 def name (self ):
489- """Name of the Experiment being analyzed"""
553+ """Name of the Experiment being analyzed. """
490554 return self ._experiment_name
491555
492556 def __repr__ (self ):
@@ -499,7 +563,7 @@ def clear_cache(self):
499563 self ._trial_components = None
500564
501565 def _reshape_parameters (self , parameters ):
502- """Reshape trial component parameters to a pandas column
566+ """Reshape trial component parameters to a pandas column.
503567 Args:
504568 parameters: trial component parameters
505569 Returns:
@@ -513,7 +577,7 @@ def _reshape_parameters(self, parameters):
513577 return out
514578
515579 def _reshape_metrics (self , metrics ):
516- """Reshape trial component metrics to a pandas column
580+ """Reshape trial component metrics to a pandas column.
517581 Args:
518582 metrics: trial component metrics
519583 Returns:
@@ -533,7 +597,7 @@ def _reshape_metrics(self, metrics):
533597 return out
534598
535599 def _reshape_artifacts (self , artifacts , _artifact_names ):
536- """Reshape trial component input/output artifacts to a pandas column
600+ """Reshape trial component input/output artifacts to a pandas column.
537601 Args:
538602 artifacts: trial component input/output artifacts
539603 Returns:
@@ -548,7 +612,8 @@ def _reshape_artifacts(self, artifacts, _artifact_names):
548612 return out
549613
550614 def _reshape_parents (self , parents ):
551- """Reshape trial component parents to a pandas column
615+ """Reshape trial component parents to a pandas column.
616+
552617 Args:
553618 parents: trial component parents (trials and experiments)
554619 Returns:
@@ -565,7 +630,7 @@ def _reshape_parents(self, parents):
565630 return out
566631
567632 def _reshape (self , trial_component ):
568- """Reshape trial component data to pandas columns
633+ """Reshape trial component data to pandas columns.
569634 Args:
570635 trial_component: dict representing a trial component
571636 Returns:
@@ -633,8 +698,7 @@ def _get_trial_components(self, force_refresh=False):
633698 return self ._search (self ._search_expression , self ._sort_by , self ._sort_order )
634699
635700 def _search (self , search_expression , sort_by , sort_order ):
636- """
637- Perform a search query using SageMaker Search and return the matching trial components
701+ """Perform a search query using SageMaker Search and return the matching trial components.
638702
639703 Args:
640704 search_expression: Search expression to filter trial components.
0 commit comments