diff --git a/src/sagemaker/jumpstart/types.py b/src/sagemaker/jumpstart/types.py index 8e53bf6f83..05c6a00961 100644 --- a/src/sagemaker/jumpstart/types.py +++ b/src/sagemaker/jumpstart/types.py @@ -744,12 +744,12 @@ def _get_regional_property( class JumpStartBenchmarkStat(JumpStartDataHolderType): - """Data class JumpStart benchmark stats.""" + """Data class JumpStart benchmark stat.""" __slots__ = ["name", "value", "unit"] def __init__(self, spec: Dict[str, Any]): - """Initializes a JumpStartBenchmarkStat object + """Initializes a JumpStartBenchmarkStat object. Args: spec (Dict[str, Any]): Dictionary representation of benchmark stat. @@ -858,7 +858,7 @@ class JumpStartMetadataBaseFields(JumpStartDataHolderType): "model_subscription_link", ] - def __init__(self, fields: Optional[Dict[str, Any]]): + def __init__(self, fields: Dict[str, Any]): """Initializes a JumpStartMetadataFields object. Args: @@ -877,7 +877,7 @@ def from_json(self, json_obj: Dict[str, Any]) -> None: self.version: str = json_obj.get("version") self.min_sdk_version: str = json_obj.get("min_sdk_version") self.incremental_training_supported: bool = bool( - json_obj.get("incremental_training_supported") + json_obj.get("incremental_training_supported", False) ) self.hosting_ecr_specs: Optional[JumpStartECRSpecs] = ( JumpStartECRSpecs(json_obj["hosting_ecr_specs"]) @@ -1038,7 +1038,7 @@ class JumpStartConfigComponent(JumpStartMetadataBaseFields): __slots__ = slots + JumpStartMetadataBaseFields.__slots__ - def __init__( # pylint: disable=super-init-not-called + def __init__( self, component_name: str, component: Optional[Dict[str, Any]], @@ -1049,7 +1049,10 @@ def __init__( # pylint: disable=super-init-not-called component_name (str): Name of the component. component (Dict[str, Any]): Dictionary representation of the config component. + Raises: + ValueError: If the component field is invalid. """ + super().__init__(component) self.component_name = component_name self.from_json(component) @@ -1080,7 +1083,7 @@ def __init__( self, base_fields: Dict[str, Any], config_components: Dict[str, JumpStartConfigComponent], - benchmark_metrics: Dict[str, JumpStartBenchmarkStat], + benchmark_metrics: Dict[str, List[JumpStartBenchmarkStat]], ): """Initializes a JumpStartMetadataConfig object from its json representation. @@ -1089,12 +1092,12 @@ def __init__( The default base fields that are used to construct the final resolved config. config_components (Dict[str, JumpStartConfigComponent]): The list of components that are used to construct the resolved config. - benchmark_metrics (Dict[str, JumpStartBenchmarkStat]): + benchmark_metrics (Dict[str, List[JumpStartBenchmarkStat]]): The dictionary of benchmark metrics with name being the key. """ self.base_fields = base_fields self.config_components: Dict[str, JumpStartConfigComponent] = config_components - self.benchmark_metrics: Dict[str, JumpStartBenchmarkStat] = benchmark_metrics + self.benchmark_metrics: Dict[str, List[JumpStartBenchmarkStat]] = benchmark_metrics self.resolved_metadata_config: Optional[Dict[str, Any]] = None def to_json(self) -> Dict[str, Any]: @@ -1104,7 +1107,7 @@ def to_json(self) -> Dict[str, Any]: @property def resolved_config(self) -> Dict[str, Any]: - """Returns the final config that is resolved from the list of components. + """Returns the final config that is resolved from the components map. Construct the final config by applying the list of configs from list index, and apply to the base default fields in the current model specs. @@ -1139,7 +1142,7 @@ def __init__( Args: configs (Dict[str, JumpStartMetadataConfig]): - List of configs that the current model has. + The map of JumpStartMetadataConfig object, with config name being the key. config_rankings (JumpStartConfigRanking): Config ranking class represents the ranking of the configs in the model. scope (JumpStartScriptScope): @@ -1158,19 +1161,30 @@ def get_top_config_from_ranking( self, ranking_name: str = JumpStartConfigRankingName.DEFAULT, instance_type: Optional[str] = None, - ) -> JumpStartMetadataConfig: - """Gets the best the config based on config ranking.""" + ) -> Optional[JumpStartMetadataConfig]: + """Gets the best the config based on config ranking. + + Args: + ranking_name (str): + The ranking name that config priority is based on. + instance_type (Optional[str]): + The instance type which the config selection is based on. + + Raises: + ValueError: If the config exists but missing config ranking. + NotImplementedError: If the scope is unrecognized. + """ if self.configs and ( not self.config_rankings or not self.config_rankings.get(ranking_name) ): - raise ValueError("Config exists but missing config ranking.") + raise ValueError(f"Config exists but missing config ranking {ranking_name}.") if self.scope == JumpStartScriptScope.INFERENCE: instance_type_attribute = "supported_inference_instance_types" elif self.scope == JumpStartScriptScope.TRAINING: instance_type_attribute = "supported_training_instance_types" else: - raise ValueError(f"Unknown script scope {self.scope}") + raise NotImplementedError(f"Unknown script scope {self.scope}") rankings = self.config_rankings.get(ranking_name) for config_name in rankings.rankings: @@ -1198,12 +1212,13 @@ class JumpStartModelSpecs(JumpStartMetadataBaseFields): __slots__ = JumpStartMetadataBaseFields.__slots__ + slots - def __init__(self, spec: Dict[str, Any]): # pylint: disable=super-init-not-called + def __init__(self, spec: Dict[str, Any]): """Initializes a JumpStartModelSpecs object from its json representation. Args: spec (Dict[str, Any]): Dictionary representation of spec. """ + super().__init__(spec) self.from_json(spec) if self.inference_configs and self.inference_configs.get_top_config_from_ranking(): super().from_json(self.inference_configs.get_top_config_from_ranking().resolved_config) @@ -1245,8 +1260,8 @@ def from_json(self, json_obj: Dict[str, Any]) -> None: ), ( { - stat_name: JumpStartBenchmarkStat(stat) - for stat_name, stat in config.get("benchmark_metrics").items() + stat_name: [JumpStartBenchmarkStat(stat) for stat in stats] + for stat_name, stats in config.get("benchmark_metrics").items() } if config and config.get("benchmark_metrics") else None @@ -1297,8 +1312,8 @@ def from_json(self, json_obj: Dict[str, Any]) -> None: ), ( { - stat_name: JumpStartBenchmarkStat(stat) - for stat_name, stat in config.get("benchmark_metrics").items() + stat_name: [JumpStartBenchmarkStat(stat) for stat in stats] + for stat_name, stats in config.get("benchmark_metrics").items() } if config and config.get("benchmark_metrics") else None @@ -1330,13 +1345,26 @@ def set_config( config_name (str): Name of the config. scope (JumpStartScriptScope, optional): Scope of the config. Defaults to JumpStartScriptScope.INFERENCE. + + Raises: + ValueError: If the scope is not supported, or cannot find config name. """ if scope == JumpStartScriptScope.INFERENCE: - super().from_json(self.inference_configs.configs[config_name].resolved_config) + metadata_configs = self.inference_configs elif scope == JumpStartScriptScope.TRAINING and self.training_supported: - super().from_json(self.training_configs.configs[config_name].resolved_config) + metadata_configs = self.training_configs else: - raise ValueError(f"Unknown Jumpstart Script scope {scope}.") + raise ValueError(f"Unknown Jumpstart script scope {scope}.") + + config_object = metadata_configs.configs.get(config_name) + if not config_object: + error_msg = f"Cannot find Jumpstart config name {config_name}. " + config_names = list(metadata_configs.configs.keys()) + if config_names: + error_msg += f"List of config names that is supported by the model: {config_names}" + raise ValueError(error_msg) + + super().from_json(config_object.resolved_config) def supports_prepacked_inference(self) -> bool: """Returns True if the model has a prepacked inference artifact.""" diff --git a/tests/unit/sagemaker/jumpstart/constants.py b/tests/unit/sagemaker/jumpstart/constants.py index 2b6856b1f3..f165a513a9 100644 --- a/tests/unit/sagemaker/jumpstart/constants.py +++ b/tests/unit/sagemaker/jumpstart/constants.py @@ -6270,6 +6270,10 @@ "framework_version": "1.5.0", "py_version": "py3", }, + "default_inference_instance_type": "ml.p2.xlarge", + "supported_inference_instance_type": ["ml.p2.xlarge", "ml.p3.xlarge"], + "default_training_instance_type": "ml.p2.xlarge", + "supported_training_instance_type": ["ml.p2.xlarge", "ml.p3.xlarge"], "hosting_artifact_key": "pytorch-infer/infer-pytorch-eqa-bert-base-cased.tar.gz", "hosting_script_key": "source-directory-tarballs/pytorch/inference/eqa/v1.0.0/sourcedir.tar.gz", "inference_vulnerable": False, @@ -7658,25 +7662,25 @@ "inference_configs": { "neuron-inference": { "benchmark_metrics": { - "ml.inf2.2xlarge": {"name": "Latency", "value": "100", "unit": "Tokens/S"} + "ml.inf2.2xlarge": [{"name": "Latency", "value": "100", "unit": "Tokens/S"}] }, - "component_names": ["neuron-base"], + "component_names": ["neuron-inference"], }, "neuron-inference-budget": { "benchmark_metrics": { - "ml.inf2.2xlarge": {"name": "Latency", "value": "100", "unit": "Tokens/S"} + "ml.inf2.2xlarge": [{"name": "Latency", "value": "100", "unit": "Tokens/S"}] }, "component_names": ["neuron-base"], }, "gpu-inference-budget": { "benchmark_metrics": { - "ml.p3.2xlarge": {"name": "Latency", "value": "100", "unit": "Tokens/S"} + "ml.p3.2xlarge": [{"name": "Latency", "value": "100", "unit": "Tokens/S"}] }, "component_names": ["gpu-inference-budget"], }, "gpu-inference": { "benchmark_metrics": { - "ml.p3.2xlarge": {"name": "Latency", "value": "100", "unit": "Tokens/S"} + "ml.p3.2xlarge": [{"name": "Latency", "value": "100", "unit": "Tokens/S"}] }, "component_names": ["gpu-inference"], }, @@ -7686,7 +7690,13 @@ "supported_inference_instance_types": ["ml.inf2.xlarge", "ml.inf2.2xlarge"] }, "neuron-inference": { + "default_inference_instance_type": "ml.inf2.xlarge", "supported_inference_instance_types": ["ml.inf2.xlarge", "ml.inf2.2xlarge"], + "hosting_ecr_specs": { + "framework": "huggingface-llm-neuronx", + "framework_version": "0.0.17", + "py_version": "py310", + }, "hosting_artifact_key": "artifacts/meta-textgeneration-llama-2-7b/neuron-inference/model/", "hosting_instance_type_variants": { "regional_aliases": { @@ -7738,27 +7748,27 @@ "training_configs": { "neuron-training": { "benchmark_metrics": { - "ml.tr1n1.2xlarge": {"name": "Latency", "value": "100", "unit": "Tokens/S"}, - "ml.tr1n1.4xlarge": {"name": "Latency", "value": "50", "unit": "Tokens/S"}, + "ml.tr1n1.2xlarge": [{"name": "Latency", "value": "100", "unit": "Tokens/S"}], + "ml.tr1n1.4xlarge": [{"name": "Latency", "value": "50", "unit": "Tokens/S"}], }, "component_names": ["neuron-training"], }, "neuron-training-budget": { "benchmark_metrics": { - "ml.tr1n1.2xlarge": {"name": "Latency", "value": "100", "unit": "Tokens/S"}, - "ml.tr1n1.4xlarge": {"name": "Latency", "value": "50", "unit": "Tokens/S"}, + "ml.tr1n1.2xlarge": [{"name": "Latency", "value": "100", "unit": "Tokens/S"}], + "ml.tr1n1.4xlarge": [{"name": "Latency", "value": "50", "unit": "Tokens/S"}], }, "component_names": ["neuron-training-budget"], }, "gpu-training": { "benchmark_metrics": { - "ml.p3.2xlarge": {"name": "Latency", "value": "200", "unit": "Tokens/S"}, + "ml.p3.2xlarge": [{"name": "Latency", "value": "200", "unit": "Tokens/S"}], }, "component_names": ["gpu-training"], }, "gpu-training-budget": { "benchmark_metrics": { - "ml.p3.2xlarge": {"name": "Latency", "value": "100", "unit": "Tokens/S"} + "ml.p3.2xlarge": [{"name": "Latency", "value": "100", "unit": "Tokens/S"}] }, "component_names": ["gpu-training-budget"], }, diff --git a/tests/unit/sagemaker/jumpstart/test_types.py b/tests/unit/sagemaker/jumpstart/test_types.py index 3048bbc320..5ca01c3c52 100644 --- a/tests/unit/sagemaker/jumpstart/test_types.py +++ b/tests/unit/sagemaker/jumpstart/test_types.py @@ -12,6 +12,7 @@ # language governing permissions and limitations under the License. from __future__ import absolute_import import copy +import pytest from sagemaker.jumpstart.enums import JumpStartScriptScope from sagemaker.jumpstart.types import ( JumpStartBenchmarkStat, @@ -934,9 +935,9 @@ def test_inference_configs_parsing(): assert specs1.incremental_training_supported assert specs1.hosting_ecr_specs == JumpStartECRSpecs( { - "framework": "pytorch", - "framework_version": "1.5.0", - "py_version": "py3", + "framework": "huggingface-llm-neuronx", + "framework_version": "0.0.17", + "py_version": "py310", } ) assert specs1.training_ecr_specs == JumpStartECRSpecs( @@ -946,7 +947,10 @@ def test_inference_configs_parsing(): "py_version": "py3", } ) - assert specs1.hosting_artifact_key == "pytorch-infer/infer-pytorch-ic-mobilenet-v2.tar.gz" + assert ( + specs1.hosting_artifact_key + == "artifacts/meta-textgeneration-llama-2-7b/neuron-inference/model/" + ) assert specs1.training_artifact_key == "pytorch-training/train-pytorch-ic-mobilenet-v2.tar.gz" assert ( specs1.hosting_script_key @@ -1019,16 +1023,58 @@ def test_inference_configs_parsing(): config = specs1.inference_configs.get_top_config_from_ranking() assert config.benchmark_metrics == { - "ml.inf2.2xlarge": JumpStartBenchmarkStat( - {"name": "Latency", "value": "100", "unit": "Tokens/S"} - ) + "ml.inf2.2xlarge": [ + JumpStartBenchmarkStat({"name": "Latency", "value": "100", "unit": "Tokens/S"}) + ] } assert len(config.config_components) == 1 - assert config.config_components["neuron-base"] == JumpStartConfigComponent( - "neuron-base", - {"supported_inference_instance_types": ["ml.inf2.xlarge", "ml.inf2.2xlarge"]}, + assert config.config_components["neuron-inference"] == JumpStartConfigComponent( + "neuron-inference", + { + "default_inference_instance_type": "ml.inf2.xlarge", + "supported_inference_instance_types": ["ml.inf2.xlarge", "ml.inf2.2xlarge"], + "hosting_ecr_specs": { + "framework": "huggingface-llm-neuronx", + "framework_version": "0.0.17", + "py_version": "py310", + }, + "hosting_artifact_key": "artifacts/meta-textgeneration-llama-2-7b/neuron-inference/model/", + "hosting_instance_type_variants": { + "regional_aliases": { + "us-west-2": { + "neuron-ecr-uri": "763104351884.dkr.ecr.us-west-2.amazonaws.com/" + "huggingface-pytorch-hosting:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04" + } + }, + "variants": {"inf2": {"regional_properties": {"image_uri": "$neuron-ecr-uri"}}}, + }, + }, ) - assert list(config.config_components.keys()) == ["neuron-base"] + assert list(config.config_components.keys()) == ["neuron-inference"] + + +def test_set_inference_configs(): + spec = {**BASE_SPEC, **INFERENCE_CONFIGS, **INFERENCE_CONFIG_RANKINGS} + specs1 = JumpStartModelSpecs(spec) + + assert list(specs1.inference_config_components.keys()) == [ + "neuron-base", + "neuron-inference", + "neuron-budget", + "gpu-inference", + "gpu-inference-budget", + ] + + with pytest.raises(ValueError) as error: + specs1.set_config("invalid_name") + assert "Cannot find Jumpstart config name invalid_name." + "List of config names that is supported by the model: " + "['neuron-inference', 'neuron-inference-budget', " + "'gpu-inference-budget', 'gpu-inference']" in str(error.value) + + assert specs1.supported_inference_instance_types == ["ml.inf2.xlarge", "ml.inf2.2xlarge"] + specs1.set_config("gpu-inference") + assert specs1.supported_inference_instance_types == ["ml.p2.xlarge", "ml.p3.2xlarge"] def test_training_configs_parsing(): @@ -1133,12 +1179,12 @@ def test_training_configs_parsing(): config = specs1.training_configs.get_top_config_from_ranking() assert config.benchmark_metrics == { - "ml.tr1n1.2xlarge": JumpStartBenchmarkStat( - {"name": "Latency", "value": "100", "unit": "Tokens/S"} - ), - "ml.tr1n1.4xlarge": JumpStartBenchmarkStat( - {"name": "Latency", "value": "50", "unit": "Tokens/S"} - ), + "ml.tr1n1.2xlarge": [ + JumpStartBenchmarkStat({"name": "Latency", "value": "100", "unit": "Tokens/S"}) + ], + "ml.tr1n1.4xlarge": [ + JumpStartBenchmarkStat({"name": "Latency", "value": "50", "unit": "Tokens/S"}) + ], } assert len(config.config_components) == 1 assert config.config_components["neuron-training"] == JumpStartConfigComponent( @@ -1192,3 +1238,13 @@ def test_set_training_config(): specs1.training_artifact_key == "artifacts/meta-textgeneration-llama-2-7b/gpu-training-budget/model/" ) + + with pytest.raises(ValueError) as error: + specs1.set_config("invalid_name", scope=JumpStartScriptScope.TRAINING) + assert "Cannot find Jumpstart config name invalid_name." + "List of config names that is supported by the model: " + "['neuron-training', 'neuron-training-budget', " + "'gpu-training-budget', 'gpu-training']" in str(error.value) + + with pytest.raises(ValueError) as error: + specs1.set_config("invalid_name", scope="unknown scope") diff --git a/tests/unit/sagemaker/jumpstart/test_utils.py b/tests/unit/sagemaker/jumpstart/test_utils.py index e7a7d522c3..c1ea8abcb8 100644 --- a/tests/unit/sagemaker/jumpstart/test_utils.py +++ b/tests/unit/sagemaker/jumpstart/test_utils.py @@ -1598,24 +1598,24 @@ def test_get_jumpstart_benchmark_stats_full_list( "mock-region", "mock-model", "mock-model-version", config_names=None ) == { "neuron-inference": { - "ml.inf2.2xlarge": JumpStartBenchmarkStat( - {"name": "Latency", "value": "100", "unit": "Tokens/S"} - ) + "ml.inf2.2xlarge": [ + JumpStartBenchmarkStat({"name": "Latency", "value": "100", "unit": "Tokens/S"}) + ] }, "neuron-inference-budget": { - "ml.inf2.2xlarge": JumpStartBenchmarkStat( - {"name": "Latency", "value": "100", "unit": "Tokens/S"} - ) + "ml.inf2.2xlarge": [ + JumpStartBenchmarkStat({"name": "Latency", "value": "100", "unit": "Tokens/S"}) + ] }, "gpu-inference-budget": { - "ml.p3.2xlarge": JumpStartBenchmarkStat( - {"name": "Latency", "value": "100", "unit": "Tokens/S"} - ) + "ml.p3.2xlarge": [ + JumpStartBenchmarkStat({"name": "Latency", "value": "100", "unit": "Tokens/S"}) + ] }, "gpu-inference": { - "ml.p3.2xlarge": JumpStartBenchmarkStat( - {"name": "Latency", "value": "100", "unit": "Tokens/S"} - ) + "ml.p3.2xlarge": [ + JumpStartBenchmarkStat({"name": "Latency", "value": "100", "unit": "Tokens/S"}) + ] }, } @@ -1633,14 +1633,14 @@ def test_get_jumpstart_benchmark_stats_partial_list( config_names=["neuron-inference-budget", "gpu-inference-budget"], ) == { "neuron-inference-budget": { - "ml.inf2.2xlarge": JumpStartBenchmarkStat( - {"name": "Latency", "value": "100", "unit": "Tokens/S"} - ) + "ml.inf2.2xlarge": [ + JumpStartBenchmarkStat({"name": "Latency", "value": "100", "unit": "Tokens/S"}) + ] }, "gpu-inference-budget": { - "ml.p3.2xlarge": JumpStartBenchmarkStat( - {"name": "Latency", "value": "100", "unit": "Tokens/S"} - ) + "ml.p3.2xlarge": [ + JumpStartBenchmarkStat({"name": "Latency", "value": "100", "unit": "Tokens/S"}) + ] }, } @@ -1658,9 +1658,9 @@ def test_get_jumpstart_benchmark_stats_single_stat( config_names=["neuron-inference-budget"], ) == { "neuron-inference-budget": { - "ml.inf2.2xlarge": JumpStartBenchmarkStat( - {"name": "Latency", "value": "100", "unit": "Tokens/S"} - ) + "ml.inf2.2xlarge": [ + JumpStartBenchmarkStat({"name": "Latency", "value": "100", "unit": "Tokens/S"}) + ] } } @@ -1695,16 +1695,16 @@ def test_get_jumpstart_benchmark_stats_training( config_names=["neuron-training", "gpu-training-budget"], ) == { "neuron-training": { - "ml.tr1n1.2xlarge": JumpStartBenchmarkStat( - {"name": "Latency", "value": "100", "unit": "Tokens/S"} - ), - "ml.tr1n1.4xlarge": JumpStartBenchmarkStat( - {"name": "Latency", "value": "50", "unit": "Tokens/S"} - ), + "ml.tr1n1.2xlarge": [ + JumpStartBenchmarkStat({"name": "Latency", "value": "100", "unit": "Tokens/S"}) + ], + "ml.tr1n1.4xlarge": [ + JumpStartBenchmarkStat({"name": "Latency", "value": "50", "unit": "Tokens/S"}) + ], }, "gpu-training-budget": { - "ml.p3.2xlarge": JumpStartBenchmarkStat( - {"name": "Latency", "value": "100", "unit": "Tokens/S"} - ) + "ml.p3.2xlarge": [ + JumpStartBenchmarkStat({"name": "Latency", "value": "100", "unit": "Tokens/S"}) + ] }, }