Skip to content

Commit 4bb666b

Browse files
committed
feat: support model deploy to endpoint with autoscaling metrics
1 parent 03d7885 commit 4bb666b

File tree

1 file changed

+22
-0
lines changed

1 file changed

+22
-0
lines changed

google/cloud/aiplatform/models.py

+22
Original file line numberDiff line numberDiff line change
@@ -2048,6 +2048,8 @@ def deploy(
20482048
encryption_spec_key_name: Optional[str] = None,
20492049
sync=True,
20502050
deploy_request_timeout: Optional[float] = None,
2051+
autoscaling_target_cpu_utilization: Optional[int] = None,
2052+
autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
20512053
) -> Endpoint:
20522054
"""Deploys model to endpoint. Endpoint will be created if unspecified.
20532055
@@ -2132,6 +2134,13 @@ def deploy(
21322134
be immediately returned and synced when the Future has completed.
21332135
deploy_request_timeout (float):
21342136
Optional. The timeout for the deploy request in seconds.
2137+
autoscaling_target_cpu_utilization (int):
2138+
Target CPU Utilization to use for Autoscaling Replicas.
2139+
A default value of 60 will be used if not specified.
2140+
autoscaling_target_accelerator_duty_cycle (int):
2141+
Target Accelerator Duty Cycle.
2142+
Must also set accelerator_type and accelerator_count if specified.
2143+
A default value of 60 will be used if not specified.
21352144
Returns:
21362145
endpoint ("Endpoint"):
21372146
Endpoint with the deployed model.
@@ -2166,6 +2175,8 @@ def deploy(
21662175
or initializer.global_config.encryption_spec_key_name,
21672176
sync=sync,
21682177
deploy_request_timeout=deploy_request_timeout,
2178+
autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
2179+
autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
21692180
)
21702181

21712182
@base.optional_sync(return_input_arg="endpoint", bind_future_to_self=False)
@@ -2187,6 +2198,8 @@ def _deploy(
21872198
encryption_spec_key_name: Optional[str] = None,
21882199
sync: bool = True,
21892200
deploy_request_timeout: Optional[float] = None,
2201+
autoscaling_target_cpu_utilization: Optional[int] = None,
2202+
autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
21902203
) -> Endpoint:
21912204
"""Deploys model to endpoint. Endpoint will be created if unspecified.
21922205
@@ -2271,6 +2284,13 @@ def _deploy(
22712284
be immediately returned and synced when the Future has completed.
22722285
deploy_request_timeout (float):
22732286
Optional. The timeout for the deploy request in seconds.
2287+
autoscaling_target_cpu_utilization (int):
2288+
Target CPU Utilization to use for Autoscaling Replicas.
2289+
A default value of 60 will be used if not specified.
2290+
autoscaling_target_accelerator_duty_cycle (int):
2291+
Target Accelerator Duty Cycle.
2292+
Must also set accelerator_type and accelerator_count if specified.
2293+
A default value of 60 will be used if not specified.
22742294
Returns:
22752295
endpoint ("Endpoint"):
22762296
Endpoint with the deployed model.
@@ -2306,6 +2326,8 @@ def _deploy(
23062326
explanation_parameters=explanation_parameters,
23072327
metadata=metadata,
23082328
deploy_request_timeout=deploy_request_timeout,
2329+
autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
2330+
autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
23092331
)
23102332

23112333
_LOGGER.log_action_completed_against_resource("model", "deployed", endpoint)

0 commit comments

Comments
 (0)