You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
endpoint_name: "smollm-360m-instruct-v0-2-q8-lvy"# needs to be lower case without special characters
5
+
model: HuggingFaceTB/SmolLM-360M-Instruct
6
+
revision: "main"
7
+
dtype: "default"# can be any of "awq", "eetq", "gptq", "4bit' or "8bit" (will use bitsandbytes), "bfloat16" or "float16"
8
+
reuse_existing: true # if true, ignore all params in instance, and don't delete the endpoint after evaluation
9
+
instance:
10
+
accelerator: "gpu"
11
+
region: "eu-west-1"
12
+
vendor: "aws"
13
+
instance_size: "medium"
14
+
instance_type: "g5.2xlarge"
15
+
framework: "pytorch"
16
+
endpoint_type: "protected"
17
+
namespace: null # The namespace under which to launch the endopint. Defaults to the current user's namespace
18
+
image_url: null # Optionally specify the docker image to use when launching the endpoint model. E.g., launching models with later releases of the TGI container with support for newer models.
19
+
env_vars:
20
+
null # Optional environment variables to include when launching the endpoint. e.g., `MAX_INPUT_LENGTH: 2048`
0 commit comments