aws
diff --git a/‎src/sagemaker/image_uris.py‎
Lines changed: 197 additions & 59 deletions b/‎src/sagemaker/image_uris.py‎
Lines changed: 197 additions & 59 deletions
diff --git a/‎src/sagemaker/modules/train/container_drivers/distributed_runner.json‎
Lines changed: 1 addition & 0 deletions b/‎src/sagemaker/modules/train/container_drivers/distributed_runner.json‎
Lines changed: 1 addition & 0 deletions
@@ -17,7 +17,7 @@
 import logging
 import os
 import re
-from typing import Optional
+from typing import Optional, Tuple
 from packaging.version import Version
 
 from sagemaker import utils
@@ -52,28 +52,28 @@
 
 @override_pipeline_parameter_var
 def retrieve(
-    framework,
-    region,
-    version=None,
-    py_version=None,
-    instance_type=None,
-    accelerator_type=None,
-    image_scope=None,
-    container_version=None,
-    distribution=None,
-    base_framework_version=None,
-    training_compiler_config=None,
-    model_id=None,
-    model_version=None,
-    hub_arn=None,
-    tolerate_vulnerable_model=False,
-    tolerate_deprecated_model=False,
-    sdk_version=None,
-    inference_tool=None,
-    serverless_inference_config=None,
-    sagemaker_session=DEFAULT_JUMPSTART_SAGEMAKER_SESSION,
-    config_name=None,
-    model_type: JumpStartModelType = JumpStartModelType.OPEN_WEIGHTS,
+        framework,
+        region,
+        version=None,
+        py_version=None,
+        instance_type=None,
+        accelerator_type=None,
+        image_scope=None,
+        container_version=None,
+        distribution=None,
+        base_framework_version=None,
+        training_compiler_config=None,
+        model_id=None,
+        model_version=None,
+        hub_arn=None,
+        tolerate_vulnerable_model=False,
+        tolerate_deprecated_model=False,
+        sdk_version=None,
+        inference_tool=None,
+        serverless_inference_config=None,
+        sagemaker_session=DEFAULT_JUMPSTART_SAGEMAKER_SESSION,
+        config_name=None,
+        model_type: JumpStartModelType = JumpStartModelType.OPEN_WEIGHTS,
 ) -> str:
     """Retrieves the ECR URI for the Docker image matching the given arguments.
 
@@ -250,10 +250,10 @@ def retrieve(
             if config.get("version_aliases").get(original_version):
                 _version = config.get("version_aliases")[original_version]
             if (
-                config.get("versions", {})
-                .get(_version, {})
-                .get("version_aliases", {})
-                .get(base_framework_version, {})
+                    config.get("versions", {})
+                            .get(_version, {})
+                            .get("version_aliases", {})
+                            .get(base_framework_version, {})
             ):
                 _base_framework_version = config.get("versions")[_version]["version_aliases"][
                     base_framework_version
@@ -290,16 +290,16 @@ def retrieve(
 
 
 def _get_image_tag(
-    container_version,
-    distribution,
-    final_image_scope,
-    framework,
-    inference_tool,
-    instance_type,
-    processor,
-    py_version,
-    tag_prefix,
-    version,
+        container_version,
+        distribution,
+        final_image_scope,
+        framework,
+        inference_tool,
+        instance_type,
+        processor,
+        py_version,
+        tag_prefix,
+        version,
 ):
     """Return image tag based on framework, container, and compute configuration(s)."""
     instance_type_family = utils.get_instance_type_family(instance_type)
@@ -311,8 +311,8 @@ def _get_image_tag(
                 "instance type",
             )
         if (
-            instance_type_family in GRAVITON_ALLOWED_TARGET_INSTANCE_FAMILY
-            or final_image_scope == INFERENCE_GRAVITON
+                instance_type_family in GRAVITON_ALLOWED_TARGET_INSTANCE_FAMILY
+                or final_image_scope == INFERENCE_GRAVITON
         ):
             version_to_arm64_tag_mapping = {
                 "xgboost": {
@@ -330,7 +330,7 @@ def _get_image_tag(
         tag = _format_tag(tag_prefix, processor, py_version, container_version, inference_tool)
 
         if instance_type is not None and _should_auto_select_container_version(
-            instance_type, distribution
+                instance_type, distribution
         ):
             container_versions = {
                 "tensorflow-2.3-gpu-py37": "cu110-ubuntu18.04-v3",
@@ -398,7 +398,7 @@ def _validate_instance_deprecation(framework, instance_type, version):
     """Check if instance type is deprecated for a certain framework with a certain version"""
     if utils.get_instance_type_family(instance_type) == "p2":
         if (framework == "pytorch" and Version(version) >= Version("1.13")) or (
-            framework == "tensorflow" and Version(version) >= Version("2.12")
+                framework == "tensorflow" and Version(version) >= Version("2.12")
         ):
             raise ValueError(
                 "P2 instances have been deprecated for sagemaker jobs starting PyTorch 1.13 and TensorFlow 2.12"
@@ -411,17 +411,17 @@ def _validate_for_suppported_frameworks_and_instance_type(framework, instance_ty
     """Validate if framework is supported for the instance_type"""
     # Validate for Trainium allowed frameworks
     if (
-        instance_type is not None
-        and "trn" in instance_type
-        and framework not in TRAINIUM_ALLOWED_FRAMEWORKS
+            instance_type is not None
+            and "trn" in instance_type
+            and framework not in TRAINIUM_ALLOWED_FRAMEWORKS
     ):
         _validate_framework(framework, TRAINIUM_ALLOWED_FRAMEWORKS, "framework", "Trainium")
 
     # Validate for Graviton allowed frameowrks
     if (
-        instance_type is not None
-        and utils.get_instance_type_family(instance_type) in GRAVITON_ALLOWED_TARGET_INSTANCE_FAMILY
-        and framework not in GRAVITON_ALLOWED_FRAMEWORKS
+            instance_type is not None
+            and utils.get_instance_type_family(instance_type) in GRAVITON_ALLOWED_TARGET_INSTANCE_FAMILY
+            and framework not in GRAVITON_ALLOWED_FRAMEWORKS
     ):
         _validate_framework(framework, GRAVITON_ALLOWED_FRAMEWORKS, "framework", "Graviton")
 
@@ -436,8 +436,8 @@ def config_for_framework(framework):
 def _get_final_image_scope(framework, instance_type, image_scope):
     """Return final image scope based on provided framework and instance type."""
     if (
-        framework in GRAVITON_ALLOWED_FRAMEWORKS
-        and utils.get_instance_type_family(instance_type) in GRAVITON_ALLOWED_TARGET_INSTANCE_FAMILY
+            framework in GRAVITON_ALLOWED_FRAMEWORKS
+            and utils.get_instance_type_family(instance_type) in GRAVITON_ALLOWED_TARGET_INSTANCE_FAMILY
     ):
         return INFERENCE_GRAVITON
     if image_scope is None and framework in (XGBOOST_FRAMEWORK, SKLEARN_FRAMEWORK):
@@ -635,16 +635,16 @@ def _format_tag(tag_prefix, processor, py_version, container_version, inference_
 
 @override_pipeline_parameter_var
 def get_training_image_uri(
-    region,
-    framework,
-    framework_version=None,
-    py_version=None,
-    image_uri=None,
-    distribution=None,
-    compiler_config=None,
-    tensorflow_version=None,
-    pytorch_version=None,
-    instance_type=None,
+        region,
+        framework,
+        framework_version=None,
+        py_version=None,
+        image_uri=None,
+        distribution=None,
+        compiler_config=None,
+        tensorflow_version=None,
+        pytorch_version=None,
+        instance_type=None,
 ) -> str:
     """Retrieves the image URI for training.
 
@@ -746,3 +746,141 @@ def get_base_python_image_uri(region, py_version="310") -> str:
     repo_and_tag = repo + ":" + version
 
     return ECR_URI_TEMPLATE.format(registry=registry, hostname=hostname, repository=repo_and_tag)
+
+
+def get_latest_container_image(framework: str,
+                               image_scope: Optional[str] = None,
+                               instance_type: Optional[str] = None,
+                               py_version: Optional[str] = None,
+                               region: str = "us-west-2",
+                               version: Optional[str] = None,
+                               accelerator_type=None,
+                               container_version=None,
+                               distribution=None,
+                               base_framework_version=None,
+                               training_compiler_config=None,
+                               model_id=None,
+                               model_version=None,
+                               hub_arn=None,
+                               sdk_version=None,
+                               inference_tool=None,
+                               serverless_inference_config=None,
+                               config_name=None,
+                               ) -> Tuple[str, str]:
+    """Retrieves the latest container image URI
+    Args:
+        framework (str): The name of the framework or algorithm.
+        image_scope (str): The image type, i.e. what it is used for.
+            Valid values: "training", "inference", "inference_graviton", "eia".
+            If ``accelerator_type`` is set, ``image_scope`` is ignored.
+        region (str): The AWS region.
+        version (str): The framework or algorithm version. This is required if there is
+            more than one supported version for the given framework or algorithm.
+        py_version (str): The Python version. This is required if there is
+            more than one supported Python version for the given framework version.
+        instance_type (str): The SageMaker instance type. For supported types, see
+            https://aws.amazon.com/sagemaker/pricing. This is required if
+            there are different images for different processor types.
+        accelerator_type (str): Elastic Inference accelerator type. For more, see
+            https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html.
+        container_version (str): the version of docker image.
+            Ideally the value of parameter should be created inside the framework.
+            For custom use, see the list of supported container versions:
+            https://github.com/aws/deep-learning-containers/blob/master/available_images.md
+            (default: None).
+        distribution (dict): A dictionary with information on how to run distributed training
+        training_compiler_config (:class:`~sagemaker.training_compiler.TrainingCompilerConfig`):
+            A configuration class for the SageMaker Training Compiler
+            (default: None).
+        model_id (str): The JumpStart model ID for which to retrieve the image URI
+            (default: None).
+        model_version (str): The version of the JumpStart model for which to retrieve the
+            image URI (default: None).
+        hub_arn (str): The arn of the SageMaker Hub for which to retrieve
+            model details from. (Default: None).
+        sdk_version (str): the version of python-sdk that will be used in the image retrieval.
+            (default: None).
+        inference_tool (str): the tool that will be used to aid in the inference.
+            Valid values: "neuron, neuronx, None"
+            (default: None).
+        serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig):
+            Specifies configuration related to serverless endpoint. Instance type is
+            not provided in serverless inference. So this is used to determine processor type.
+        config_name (Optional[str]): Name of the JumpStart Model config to apply. (Default: None).
+    """
+    try:
+        framework_config = config_for_framework(framework)
+    except FileNotFoundError:
+        raise ValueError("Invalid framework {}".format(framework))
+
+    if not framework_config:
+        raise ValueError("Invalid framework {}".format(framework))
+
+    if not version:
+        version = _fetch_latest_version_from_config(framework_config, image_scope)
+    image_uri = retrieve(framework=framework,
+                         region=region,
+                         version=version,
+                         instance_type=instance_type,
+                         py_version=py_version,
+                         accelerator_type=accelerator_type,
+                         image_scope=image_scope,
+                         container_version=container_version,
+                         distribution=distribution,
+                         base_framework_version=base_framework_version,
+                         training_compiler_config=training_compiler_config,
+                         model_id=model_id,
+                         model_version=model_version,
+                         hub_arn=hub_arn,
+                         sdk_version=sdk_version,
+                         inference_tool=inference_tool,
+                         serverless_inference_config=serverless_inference_config,
+                         config_name=config_name
+                         )
+    return image_uri, version
+
+
+def _fetch_latest_version_from_config(framework_config: dict,
+                                      image_scope: Optional[str] = None) -> Optional[str]:
+    """ Helper function to fetch the latest version as a string from a framework's config
+    Args:
+        framework_config (dict): A framework config dict.
+        image_scope (str): Scope of the image, eg: training, inference
+    Returns:
+        Version string if latest version found else None
+    """
+    if image_scope in framework_config:
+        if image_scope_config := framework_config[image_scope]:
+            if "version_aliases" in image_scope_config:
+                if "latest" in image_scope_config["version_aliases"]:
+                    return image_scope_config["version_aliases"]["latest"]
+    top_version = None
+    bottom_version = None
+
+    if "versions" in framework_config:
+        versions = list(framework_config["versions"].keys())
+        top_version = versions[0]
+        bottom_version = versions[-1]
+        if top_version == "latest" or bottom_version == "latest":
+            return None
+    elif (image_scope is not None and image_scope in framework_config
+          and "versions" in framework_config[image_scope]):
+        versions = list(framework_config[image_scope]["versions"].keys())
+        top_version = versions[0]
+        bottom_version = versions[-1]
+    elif "processing" in framework_config and "versions" in framework_config["processing"]:
+        versions = list(framework_config["processing"]["versions"].keys())
+        top_version = versions[0]
+        bottom_version = versions[-1]
+
+    if top_version and bottom_version:
+        if top_version.endswith(".x") or bottom_version.endswith(".x"):
+            top_number = int(top_version[:-2])
+            bottom_number = int(bottom_version[:-2])
+            max_version = max(top_number, bottom_number)
+            return f"{max_version}.x"
+        if Version(top_version) >= Version(bottom_version):
+            return top_version
+        return bottom_version
+
+    return None
@@ -0,0 +1 @@
+{"mpi_additional_options": ["-x", "MASTER_ADDR=algo-1", "-x", "MASTER_PORT=7777"], "_type": "mpi"}
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+{"mpi_additional_options": ["-x", "MASTER_ADDR=algo-1", "-x", "MASTER_PORT=7777"], "_type": "mpi"}`