Skip to content

Commit 149edb7

Browse files
makungaj1Jonathan Makunga
andauthored
fix benchmark feature read-only apis (#4675)
* Rearrange benchmark metric table * Refactoring * Refactoring * Refactoring * Refactoring * Debug * Debug * Debug * Debug * Debug * Debug * Debug * Debug * Debug * Debug * Debug * Debug * Debug * Debug * Debug * Debug * Debug * Refactoring * Refactoring * Refactoring * Refactoring * Refactoring * Add Unit tests * Refactoring * Refactoring * hide index from DataFrame --------- Co-authored-by: Jonathan Makunga <[email protected]>
1 parent 2b71ab4 commit 149edb7

File tree

7 files changed

+287
-63
lines changed

7 files changed

+287
-63
lines changed

src/sagemaker/jumpstart/model.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -465,13 +465,13 @@ def benchmark_metrics(self) -> pd.DataFrame:
465465
Benchmark Metrics: Pandas DataFrame object.
466466
"""
467467
df = pd.DataFrame(self._get_deployment_configs_benchmarks_data())
468-
default_mask = df.apply(lambda row: any("Default" in str(val) for val in row), axis=1)
469-
sorted_df = pd.concat([df[default_mask], df[~default_mask]])
470-
return sorted_df
468+
blank_index = [""] * len(df)
469+
df.index = blank_index
470+
return df
471471

472472
def display_benchmark_metrics(self, *args, **kwargs) -> None:
473473
"""Display deployment configs benchmark metrics."""
474-
print(self.benchmark_metrics.to_markdown(index=False), *args, **kwargs)
474+
print(self.benchmark_metrics.to_markdown(index=False, floatfmt=".2f"), *args, **kwargs)
475475

476476
def list_deployment_configs(self) -> List[Dict[str, Any]]:
477477
"""List deployment configs for ``This`` model.
@@ -911,16 +911,34 @@ def _get_deployment_configs(
911911
)
912912
)
913913

914+
config_components = metadata_config.config_components.get(config_name)
915+
image_uri = (
916+
(
917+
config_components.hosting_instance_type_variants.get("regional_aliases", {})
918+
.get(self.region, {})
919+
.get("alias_ecr_uri_1")
920+
)
921+
if config_components
922+
else self.image_uri
923+
)
924+
914925
init_kwargs = get_init_kwargs(
926+
config_name=config_name,
915927
model_id=self.model_id,
916928
instance_type=instance_type_to_use,
917929
sagemaker_session=self.sagemaker_session,
930+
image_uri=image_uri,
931+
region=self.region,
932+
model_version=self.model_version,
918933
)
919934
deploy_kwargs = get_deploy_kwargs(
920935
model_id=self.model_id,
921936
instance_type=instance_type_to_use,
922937
sagemaker_session=self.sagemaker_session,
938+
region=self.region,
939+
model_version=self.model_version,
923940
)
941+
924942
deployment_config_metadata = DeploymentConfigMetadata(
925943
config_name,
926944
metadata_config.benchmark_metrics,

src/sagemaker/jumpstart/types.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -746,7 +746,7 @@ def _get_regional_property(
746746
class JumpStartBenchmarkStat(JumpStartDataHolderType):
747747
"""Data class JumpStart benchmark stat."""
748748

749-
__slots__ = ["name", "value", "unit"]
749+
__slots__ = ["name", "value", "unit", "concurrency"]
750750

751751
def __init__(self, spec: Dict[str, Any]):
752752
"""Initializes a JumpStartBenchmarkStat object.
@@ -765,6 +765,7 @@ def from_json(self, json_obj: Dict[str, Any]) -> None:
765765
self.name: str = json_obj["name"]
766766
self.value: str = json_obj["value"]
767767
self.unit: Union[int, str] = json_obj["unit"]
768+
self.concurrency: Union[int, str] = json_obj["concurrency"]
768769

769770
def to_json(self) -> Dict[str, Any]:
770771
"""Returns json representation of JumpStartBenchmarkStat object."""

src/sagemaker/jumpstart/utils.py

Lines changed: 78 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1082,7 +1082,9 @@ def add_instance_rate_stats_to_benchmark_metrics(
10821082

10831083
if not benchmark_metric_stats:
10841084
benchmark_metric_stats = []
1085-
benchmark_metric_stats.append(JumpStartBenchmarkStat(instance_type_rate))
1085+
benchmark_metric_stats.append(
1086+
JumpStartBenchmarkStat({"concurrency": None, **instance_type_rate})
1087+
)
10861088

10871089
final_benchmark_metrics[instance_type] = benchmark_metric_stats
10881090
except ClientError as e:
@@ -1127,43 +1129,94 @@ def get_metrics_from_deployment_configs(
11271129
if not deployment_configs:
11281130
return {}
11291131

1130-
data = {"Instance Type": [], "Config Name": []}
1132+
data = {"Instance Type": [], "Config Name": [], "Concurrent Users": []}
11311133
instance_rate_data = {}
11321134
for index, deployment_config in enumerate(deployment_configs):
11331135
benchmark_metrics = deployment_config.benchmark_metrics
11341136
if not deployment_config.deployment_args or not benchmark_metrics:
11351137
continue
11361138

1137-
for inner_index, current_instance_type in enumerate(benchmark_metrics):
1138-
current_instance_type_metrics = benchmark_metrics[current_instance_type]
1139-
1140-
data["Config Name"].append(deployment_config.deployment_config_name)
1141-
instance_type_to_display = (
1142-
f"{current_instance_type} (Default)"
1143-
if index == 0
1144-
and current_instance_type == deployment_config.deployment_args.default_instance_type
1145-
else current_instance_type
1139+
for current_instance_type, current_instance_type_metrics in benchmark_metrics.items():
1140+
instance_type_rate, concurrent_users = _normalize_benchmark_metrics(
1141+
current_instance_type_metrics
11461142
)
1147-
data["Instance Type"].append(instance_type_to_display)
1148-
1149-
for metric in current_instance_type_metrics:
1150-
column_name = f"{metric.name} ({metric.unit})"
1151-
1152-
if metric.name.lower() == "instance rate":
1153-
if column_name not in instance_rate_data:
1154-
instance_rate_data[column_name] = []
1155-
instance_rate_data[column_name].append(metric.value)
1156-
else:
1157-
if column_name not in data:
1158-
data[column_name] = []
1159-
for _ in range(len(data[column_name]), inner_index):
1160-
data[column_name].append(" - ")
1143+
1144+
for concurrent_user, metrics in concurrent_users.items():
1145+
instance_type_to_display = (
1146+
f"{current_instance_type} (Default)"
1147+
if index == 0
1148+
and int(concurrent_user) == 1
1149+
and current_instance_type
1150+
== deployment_config.deployment_args.default_instance_type
1151+
else current_instance_type
1152+
)
1153+
1154+
data["Config Name"].append(deployment_config.deployment_config_name)
1155+
data["Instance Type"].append(instance_type_to_display)
1156+
data["Concurrent Users"].append(concurrent_user)
1157+
1158+
if instance_type_rate:
1159+
instance_rate_column_name = (
1160+
f"{instance_type_rate.name} ({instance_type_rate.unit})"
1161+
)
1162+
instance_rate_data[instance_rate_column_name] = instance_rate_data.get(
1163+
instance_rate_column_name, []
1164+
)
1165+
instance_rate_data[instance_rate_column_name].append(instance_type_rate.value)
1166+
1167+
for metric in metrics:
1168+
column_name = _normalize_benchmark_metric_column_name(metric.name)
1169+
data[column_name] = data.get(column_name, [])
11611170
data[column_name].append(metric.value)
11621171

11631172
data = {**data, **instance_rate_data}
11641173
return data
11651174

11661175

1176+
def _normalize_benchmark_metric_column_name(name: str) -> str:
1177+
"""Normalizes benchmark metric column name.
1178+
1179+
Args:
1180+
name (str): Name of the metric.
1181+
Returns:
1182+
str: Normalized metric column name.
1183+
"""
1184+
if "latency" in name.lower():
1185+
name = "Latency for each user (TTFT in ms)"
1186+
elif "throughput" in name.lower():
1187+
name = "Throughput per user (token/seconds)"
1188+
return name
1189+
1190+
1191+
def _normalize_benchmark_metrics(
1192+
benchmark_metric_stats: List[JumpStartBenchmarkStat],
1193+
) -> Tuple[JumpStartBenchmarkStat, Dict[str, List[JumpStartBenchmarkStat]]]:
1194+
"""Normalizes benchmark metrics dict.
1195+
1196+
Args:
1197+
benchmark_metric_stats (List[JumpStartBenchmarkStat]):
1198+
List of benchmark metrics stats.
1199+
Returns:
1200+
Tuple[JumpStartBenchmarkStat, Dict[str, List[JumpStartBenchmarkStat]]]:
1201+
Normalized benchmark metrics dict.
1202+
"""
1203+
instance_type_rate = None
1204+
concurrent_users = {}
1205+
for current_instance_type_metric in benchmark_metric_stats:
1206+
if current_instance_type_metric.name.lower() == "instance rate":
1207+
instance_type_rate = current_instance_type_metric
1208+
elif current_instance_type_metric.concurrency not in concurrent_users:
1209+
concurrent_users[current_instance_type_metric.concurrency] = [
1210+
current_instance_type_metric
1211+
]
1212+
else:
1213+
concurrent_users[current_instance_type_metric.concurrency].append(
1214+
current_instance_type_metric
1215+
)
1216+
1217+
return instance_type_rate, concurrent_users
1218+
1219+
11671220
def deployment_config_response_data(
11681221
deployment_configs: Optional[List[DeploymentConfigMetadata]],
11691222
) -> List[Dict[str, Any]]:

tests/unit/sagemaker/jumpstart/constants.py

Lines changed: 39 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7662,25 +7662,33 @@
76627662
"inference_configs": {
76637663
"neuron-inference": {
76647664
"benchmark_metrics": {
7665-
"ml.inf2.2xlarge": [{"name": "Latency", "value": "100", "unit": "Tokens/S"}]
7665+
"ml.inf2.2xlarge": [
7666+
{"name": "Latency", "value": "100", "unit": "Tokens/S", "concurrency": 1}
7667+
]
76667668
},
76677669
"component_names": ["neuron-inference"],
76687670
},
76697671
"neuron-inference-budget": {
76707672
"benchmark_metrics": {
7671-
"ml.inf2.2xlarge": [{"name": "Latency", "value": "100", "unit": "Tokens/S"}]
7673+
"ml.inf2.2xlarge": [
7674+
{"name": "Latency", "value": "100", "unit": "Tokens/S", "concurrency": 1}
7675+
]
76727676
},
76737677
"component_names": ["neuron-base"],
76747678
},
76757679
"gpu-inference-budget": {
76767680
"benchmark_metrics": {
7677-
"ml.p3.2xlarge": [{"name": "Latency", "value": "100", "unit": "Tokens/S"}]
7681+
"ml.p3.2xlarge": [
7682+
{"name": "Latency", "value": "100", "unit": "Tokens/S", "concurrency": 1}
7683+
]
76787684
},
76797685
"component_names": ["gpu-inference-budget"],
76807686
},
76817687
"gpu-inference": {
76827688
"benchmark_metrics": {
7683-
"ml.p3.2xlarge": [{"name": "Latency", "value": "100", "unit": "Tokens/S"}]
7689+
"ml.p3.2xlarge": [
7690+
{"name": "Latency", "value": "100", "unit": "Tokens/S", "concurrency": 1}
7691+
]
76847692
},
76857693
"component_names": ["gpu-inference"],
76867694
},
@@ -7748,8 +7756,12 @@
77487756
"training_configs": {
77497757
"neuron-training": {
77507758
"benchmark_metrics": {
7751-
"ml.tr1n1.2xlarge": [{"name": "Latency", "value": "100", "unit": "Tokens/S"}],
7752-
"ml.tr1n1.4xlarge": [{"name": "Latency", "value": "50", "unit": "Tokens/S"}],
7759+
"ml.tr1n1.2xlarge": [
7760+
{"name": "Latency", "value": "100", "unit": "Tokens/S", "concurrency": 1}
7761+
],
7762+
"ml.tr1n1.4xlarge": [
7763+
{"name": "Latency", "value": "50", "unit": "Tokens/S", "concurrency": 1}
7764+
],
77537765
},
77547766
"component_names": ["neuron-training"],
77557767
"default_inference_config": "neuron-inference",
@@ -7759,8 +7771,12 @@
77597771
},
77607772
"neuron-training-budget": {
77617773
"benchmark_metrics": {
7762-
"ml.tr1n1.2xlarge": [{"name": "Latency", "value": "100", "unit": "Tokens/S"}],
7763-
"ml.tr1n1.4xlarge": [{"name": "Latency", "value": "50", "unit": "Tokens/S"}],
7774+
"ml.tr1n1.2xlarge": [
7775+
{"name": "Latency", "value": "100", "unit": "Tokens/S", "concurrency": 1}
7776+
],
7777+
"ml.tr1n1.4xlarge": [
7778+
{"name": "Latency", "value": "50", "unit": "Tokens/S", "concurrency": 1}
7779+
],
77647780
},
77657781
"component_names": ["neuron-training-budget"],
77667782
"default_inference_config": "neuron-inference-budget",
@@ -7770,7 +7786,9 @@
77707786
},
77717787
"gpu-training": {
77727788
"benchmark_metrics": {
7773-
"ml.p3.2xlarge": [{"name": "Latency", "value": "200", "unit": "Tokens/S"}],
7789+
"ml.p3.2xlarge": [
7790+
{"name": "Latency", "value": "200", "unit": "Tokens/S", "concurrency": "1"}
7791+
],
77747792
},
77757793
"component_names": ["gpu-training"],
77767794
"default_inference_config": "gpu-inference",
@@ -7780,7 +7798,9 @@
77807798
},
77817799
"gpu-training-budget": {
77827800
"benchmark_metrics": {
7783-
"ml.p3.2xlarge": [{"name": "Latency", "value": "100", "unit": "Tokens/S"}]
7801+
"ml.p3.2xlarge": [
7802+
{"name": "Latency", "value": "100", "unit": "Tokens/S", "concurrency": "1"}
7803+
]
77847804
},
77857805
"component_names": ["gpu-training-budget"],
77867806
"default_inference_config": "gpu-inference-budget",
@@ -7966,7 +7986,9 @@
79667986
"ContainerStartupHealthCheckTimeout": None,
79677987
},
79687988
"AccelerationConfigs": None,
7969-
"BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}],
7989+
"BenchmarkMetrics": [
7990+
{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs", "concurrency": 1}
7991+
],
79707992
},
79717993
{
79727994
"DeploymentConfigName": "neuron-inference-budget",
@@ -7998,7 +8020,9 @@
79988020
"ContainerStartupHealthCheckTimeout": None,
79998021
},
80008022
"AccelerationConfigs": None,
8001-
"BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}],
8023+
"BenchmarkMetrics": [
8024+
{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs", "concurrency": 1}
8025+
],
80028026
},
80038027
{
80048028
"DeploymentConfigName": "gpu-inference-budget",
@@ -8030,7 +8054,9 @@
80308054
"ContainerStartupHealthCheckTimeout": None,
80318055
},
80328056
"AccelerationConfigs": None,
8033-
"BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}],
8057+
"BenchmarkMetrics": [
8058+
{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs", "concurrency": 1}
8059+
],
80348060
},
80358061
{
80368062
"DeploymentConfigName": "gpu-inference",

tests/unit/sagemaker/jumpstart/test_types.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1027,7 +1027,9 @@ def test_inference_configs_parsing():
10271027

10281028
assert config.benchmark_metrics == {
10291029
"ml.inf2.2xlarge": [
1030-
JumpStartBenchmarkStat({"name": "Latency", "value": "100", "unit": "Tokens/S"})
1030+
JumpStartBenchmarkStat(
1031+
{"name": "Latency", "value": "100", "unit": "Tokens/S", "concurrency": 1}
1032+
),
10311033
]
10321034
}
10331035
assert len(config.config_components) == 1
@@ -1191,10 +1193,14 @@ def test_training_configs_parsing():
11911193

11921194
assert config.benchmark_metrics == {
11931195
"ml.tr1n1.2xlarge": [
1194-
JumpStartBenchmarkStat({"name": "Latency", "value": "100", "unit": "Tokens/S"})
1196+
JumpStartBenchmarkStat(
1197+
{"name": "Latency", "value": "100", "unit": "Tokens/S", "concurrency": 1}
1198+
),
11951199
],
11961200
"ml.tr1n1.4xlarge": [
1197-
JumpStartBenchmarkStat({"name": "Latency", "value": "50", "unit": "Tokens/S"})
1201+
JumpStartBenchmarkStat(
1202+
{"name": "Latency", "value": "50", "unit": "Tokens/S", "concurrency": 1}
1203+
),
11981204
],
11991205
}
12001206
assert len(config.config_components) == 1

0 commit comments

Comments
 (0)