-
Notifications
You must be signed in to change notification settings - Fork 354
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: support autoscaling metrics when deploying models #1197
Changes from 2 commits
03d7885
4bb666b
9501eb0
7a601c7
b278298
c82ba65
e14d1ae
fbceb92
0f2353e
8a28d7a
779b805
12ad857
4acd36d
1f19010
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -644,6 +644,8 @@ def deploy( | |
metadata: Optional[Sequence[Tuple[str, str]]] = (), | ||
sync=True, | ||
deploy_request_timeout: Optional[float] = None, | ||
autoscaling_target_cpu_utilization: Optional[int] = None, | ||
autoscaling_target_accelerator_duty_cycle: Optional[int] = None, | ||
) -> None: | ||
"""Deploys a Model to the Endpoint. | ||
|
||
|
@@ -717,6 +719,13 @@ def deploy( | |
be immediately returned and synced when the Future has completed. | ||
deploy_request_timeout (float): | ||
Optional. The timeout for the deploy request in seconds. | ||
autoscaling_target_cpu_utilization (int): | ||
Target CPU Utilization to use for Autoscaling Replicas. | ||
A default value of 60 will be used if not specified. | ||
autoscaling_target_accelerator_duty_cycle (int): | ||
Target Accelerator Duty Cycle. | ||
Must also set accelerator_type and accelerator_count if specified. | ||
A default value of 60 will be used if not specified. | ||
""" | ||
self._sync_gca_resource_if_skipped() | ||
|
||
|
@@ -747,6 +756,8 @@ def deploy( | |
metadata=metadata, | ||
sync=sync, | ||
deploy_request_timeout=deploy_request_timeout, | ||
autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization, | ||
autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle | ||
) | ||
|
||
@base.optional_sync() | ||
|
@@ -767,6 +778,8 @@ def _deploy( | |
metadata: Optional[Sequence[Tuple[str, str]]] = (), | ||
sync=True, | ||
deploy_request_timeout: Optional[float] = None, | ||
autoscaling_target_cpu_utilization: Optional[int] = None, | ||
autoscaling_target_accelerator_duty_cycle: Optional[int] = None, | ||
) -> None: | ||
"""Deploys a Model to the Endpoint. | ||
|
||
|
@@ -840,6 +853,13 @@ def _deploy( | |
be immediately returned and synced when the Future has completed. | ||
deploy_request_timeout (float): | ||
Optional. The timeout for the deploy request in seconds. | ||
autoscaling_target_cpu_utilization (int): | ||
Target CPU Utilization to use for Autoscaling Replicas. | ||
A default value of 60 will be used if not specified. | ||
autoscaling_target_accelerator_duty_cycle (int): | ||
Target Accelerator Duty Cycle. | ||
Must also set accelerator_type and accelerator_count if specified. | ||
A default value of 60 will be used if not specified. | ||
Raises: | ||
ValueError: If there is not current traffic split and traffic percentage | ||
is not 0 or 100. | ||
|
@@ -866,6 +886,8 @@ def _deploy( | |
explanation_parameters=explanation_parameters, | ||
metadata=metadata, | ||
deploy_request_timeout=deploy_request_timeout, | ||
autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization, | ||
autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle | ||
) | ||
|
||
_LOGGER.log_action_completed_against_resource("model", "deployed", self) | ||
|
@@ -892,6 +914,8 @@ def _deploy_call( | |
explanation_parameters: Optional[explain.ExplanationParameters] = None, | ||
metadata: Optional[Sequence[Tuple[str, str]]] = (), | ||
deploy_request_timeout: Optional[float] = None, | ||
autoscaling_target_cpu_utilization: Optional[int] = None, | ||
autoscaling_target_accelerator_duty_cycle: Optional[int] = None, | ||
): | ||
"""Helper method to deploy model to endpoint. | ||
|
||
|
@@ -965,6 +989,13 @@ def _deploy_call( | |
be immediately returned and synced when the Future has completed. | ||
deploy_request_timeout (float): | ||
Optional. The timeout for the deploy request in seconds. | ||
autoscaling_target_cpu_utilization (int): | ||
Target CPU Utilization to use for Autoscaling Replicas. | ||
A default value of 60 will be used if not specified. | ||
autoscaling_target_accelerator_duty_cycle (int): | ||
Target Accelerator Duty Cycle. | ||
Must also set accelerator_type and accelerator_count if specified. | ||
A default value of 60 will be used if not specified. | ||
Raises: | ||
ValueError: If there is not current traffic split and traffic percentage | ||
is not 0 or 100. | ||
|
@@ -980,6 +1011,12 @@ def _deploy_call( | |
"Both `accelerator_type` and `accelerator_count` should be specified or None." | ||
) | ||
|
||
if not accelerator_type or not accelerator_count and autoscaling_target_accelerator_duty_cycle: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think this check is working as expected (I tried deploying a model with only
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed in 9501eb0 |
||
raise ValueError( | ||
"Both `accelerator_type` and `accelerator_count` should be set " | ||
"when specifying autoscaling_target_accelerator_duty_cycle`" | ||
) | ||
|
||
deployed_model = gca_endpoint_compat.DeployedModel( | ||
model=model.resource_name, | ||
display_name=deployed_model_display_name, | ||
|
@@ -995,7 +1032,8 @@ def _deploy_call( | |
in model.supported_deployment_resources_types | ||
) | ||
provided_custom_machine_spec = ( | ||
machine_type or accelerator_type or accelerator_count | ||
machine_type or accelerator_type or accelerator_count or | ||
autoscaling_target_accelerator_duty_cycle or autoscaling_target_accelerator_duty_cycle | ||
) | ||
|
||
# If the model supports both automatic and dedicated deployment resources, | ||
|
@@ -1015,22 +1053,37 @@ def _deploy_call( | |
_LOGGER.info(f"Using default machine_type: {machine_type}") | ||
|
||
if use_dedicated_resources: | ||
|
||
dedicated_resources = gca_machine_resources_compat.DedicatedResources( | ||
min_replica_count=min_replica_count, | ||
max_replica_count=max_replica_count, | ||
) | ||
|
||
machine_spec = gca_machine_resources_compat.MachineSpec( | ||
machine_type=machine_type | ||
) | ||
|
||
if autoscaling_target_cpu_utilization: | ||
autoscaling_metric_spec = gca_machine_resources_compat.AutoscalingMetricSpec( | ||
metric_name="aiplatform.googleapis.com/prediction/online/cpu/utilization", | ||
target=autoscaling_target_cpu_utilization | ||
) | ||
dedicated_resources.autoscaling_metric_specs.extend([autoscaling_metric_spec]) | ||
|
||
if accelerator_type and accelerator_count: | ||
utils.validate_accelerator_type(accelerator_type) | ||
machine_spec.accelerator_type = accelerator_type | ||
machine_spec.accelerator_count = accelerator_count | ||
|
||
deployed_model.dedicated_resources = ( | ||
gca_machine_resources_compat.DedicatedResources( | ||
machine_spec=machine_spec, | ||
min_replica_count=min_replica_count, | ||
max_replica_count=max_replica_count, | ||
) | ||
) | ||
if autoscaling_target_accelerator_duty_cycle: | ||
autoscaling_metric_spec = gca_machine_resources_compat.AutoscalingMetricSpec( | ||
metric_name="aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle", | ||
target=autoscaling_target_accelerator_duty_cycle | ||
) | ||
dedicated_resources.autoscaling_metric_specs.extend([autoscaling_metric_spec]) | ||
|
||
dedicated_resources.machine_spec = machine_spec | ||
deployed_model.dedicated_resources = dedicated_resources | ||
|
||
elif supports_automatic_resources: | ||
deployed_model.automatic_resources = ( | ||
|
@@ -1995,6 +2048,8 @@ def deploy( | |
encryption_spec_key_name: Optional[str] = None, | ||
sync=True, | ||
deploy_request_timeout: Optional[float] = None, | ||
autoscaling_target_cpu_utilization: Optional[int] = None, | ||
autoscaling_target_accelerator_duty_cycle: Optional[int] = None, | ||
) -> Endpoint: | ||
"""Deploys model to endpoint. Endpoint will be created if unspecified. | ||
|
||
|
@@ -2079,6 +2134,13 @@ def deploy( | |
be immediately returned and synced when the Future has completed. | ||
deploy_request_timeout (float): | ||
Optional. The timeout for the deploy request in seconds. | ||
autoscaling_target_cpu_utilization (int): | ||
Target CPU Utilization to use for Autoscaling Replicas. | ||
A default value of 60 will be used if not specified. | ||
autoscaling_target_accelerator_duty_cycle (int): | ||
Target Accelerator Duty Cycle. | ||
Must also set accelerator_type and accelerator_count if specified. | ||
A default value of 60 will be used if not specified. | ||
Returns: | ||
endpoint ("Endpoint"): | ||
Endpoint with the deployed model. | ||
|
@@ -2113,6 +2175,8 @@ def deploy( | |
or initializer.global_config.encryption_spec_key_name, | ||
sync=sync, | ||
deploy_request_timeout=deploy_request_timeout, | ||
autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization, | ||
autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle, | ||
) | ||
|
||
@base.optional_sync(return_input_arg="endpoint", bind_future_to_self=False) | ||
|
@@ -2134,6 +2198,8 @@ def _deploy( | |
encryption_spec_key_name: Optional[str] = None, | ||
sync: bool = True, | ||
deploy_request_timeout: Optional[float] = None, | ||
autoscaling_target_cpu_utilization: Optional[int] = None, | ||
autoscaling_target_accelerator_duty_cycle: Optional[int] = None, | ||
) -> Endpoint: | ||
"""Deploys model to endpoint. Endpoint will be created if unspecified. | ||
|
||
|
@@ -2218,6 +2284,13 @@ def _deploy( | |
be immediately returned and synced when the Future has completed. | ||
deploy_request_timeout (float): | ||
Optional. The timeout for the deploy request in seconds. | ||
autoscaling_target_cpu_utilization (int): | ||
Target CPU Utilization to use for Autoscaling Replicas. | ||
A default value of 60 will be used if not specified. | ||
autoscaling_target_accelerator_duty_cycle (int): | ||
Target Accelerator Duty Cycle. | ||
Must also set accelerator_type and accelerator_count if specified. | ||
A default value of 60 will be used if not specified. | ||
Returns: | ||
endpoint ("Endpoint"): | ||
Endpoint with the deployed model. | ||
|
@@ -2253,6 +2326,8 @@ def _deploy( | |
explanation_parameters=explanation_parameters, | ||
metadata=metadata, | ||
deploy_request_timeout=deploy_request_timeout, | ||
autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization, | ||
autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle, | ||
) | ||
|
||
_LOGGER.log_action_completed_against_resource("model", "deployed", endpoint) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you add "Optional" to the beginning of this docstring? Same for
autoscaling_target_accelerator_duty_cycle
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed in 7a601c7