Skip to content

Commit

Permalink
Add scale_to_zero_timeout parameter to HFApi.create/update_inference_…
Browse files Browse the repository at this point in the history
…endpoint (#2463)

* Add scale_to_zero_timeout parameter to HFApi.create/update_inference_endpoint

* add ending commas for style

* add make style changes
  • Loading branch information
hommayushi3 authored Aug 20, 2024
1 parent 1b9517d commit 0231357
Showing 1 changed file with 12 additions and 1 deletion.
13 changes: 12 additions & 1 deletion src/huggingface_hub/hf_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -7408,6 +7408,7 @@ def create_inference_endpoint(
account_id: Optional[str] = None,
min_replica: int = 0,
max_replica: int = 1,
scale_to_zero_timeout: int = 15,
revision: Optional[str] = None,
task: Optional[str] = None,
custom_image: Optional[Dict] = None,
Expand Down Expand Up @@ -7440,6 +7441,8 @@ def create_inference_endpoint(
The minimum number of replicas (instances) to keep running for the Inference Endpoint. Defaults to 0.
max_replica (`int`, *optional*):
The maximum number of replicas (instances) to scale to for the Inference Endpoint. Defaults to 1.
scale_to_zero_timeout (`int`, *optional*):
The duration in minutes before an inactive endpoint is scaled to zero. Defaults to 15.
revision (`str`, *optional*):
The specific model revision to deploy on the Inference Endpoint (e.g. `"6c0e6080953db56375760c0471a8c5f2929baf11"`).
task (`str`, *optional*):
Expand Down Expand Up @@ -7525,6 +7528,7 @@ def create_inference_endpoint(
"scaling": {
"maxReplica": max_replica,
"minReplica": min_replica,
"scaleToZeroTimeout": scale_to_zero_timeout,
},
},
"model": {
Expand Down Expand Up @@ -7608,6 +7612,7 @@ def update_inference_endpoint(
instance_type: Optional[str] = None,
min_replica: Optional[int] = None,
max_replica: Optional[int] = None,
scale_to_zero_timeout: Optional[int] = None,
# Model update
repository: Optional[str] = None,
framework: Optional[str] = None,
Expand Down Expand Up @@ -7639,6 +7644,8 @@ def update_inference_endpoint(
The minimum number of replicas (instances) to keep running for the Inference Endpoint.
max_replica (`int`, *optional*):
The maximum number of replicas (instances) to scale to for the Inference Endpoint.
scale_to_zero_timeout (`int`, *optional*):
The duration in minutes before an inactive endpoint is scaled to zero.
repository (`str`, *optional*):
The name of the model repository associated with the Inference Endpoint (e.g. `"gpt2"`).
Expand Down Expand Up @@ -7666,14 +7673,18 @@ def update_inference_endpoint(
namespace = namespace or self._get_namespace(token=token)

payload: Dict = {}
if any(value is not None for value in (accelerator, instance_size, instance_type, min_replica, max_replica)):
if any(
value is not None
for value in (accelerator, instance_size, instance_type, min_replica, max_replica, scale_to_zero_timeout)
):
payload["compute"] = {
"accelerator": accelerator,
"instanceSize": instance_size,
"instanceType": instance_type,
"scaling": {
"maxReplica": max_replica,
"minReplica": min_replica,
"scaleToZeroTimeout": scale_to_zero_timeout,
},
}
if any(value is not None for value in (repository, framework, revision, task, custom_image)):
Expand Down

0 comments on commit 0231357

Please sign in to comment.