Skip to content
Merged
77 changes: 75 additions & 2 deletions packages/tasks/src/local-apps.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,30 @@ export type LocalApp = {
}
);

// eslint-disable-next-line @typescript-eslint/no-unused-vars
function isGgufModel(model: ModelData) {
function isGgufModel(model: ModelData): boolean {
return model.tags.includes("gguf");
}

function isAwqModel(model: ModelData): boolean {
return model.config?.quantization_config?.quant_method === "awq";
}

function isGptqModel(model: ModelData): boolean {
return model.config?.quantization_config?.quant_method === "gptq";
}

function isAqlmModel(model: ModelData): boolean {
return model.config?.quantization_config?.quant_method === "aqlm";
}

function isMarlinModel(model: ModelData): boolean {
return model.config?.quantization_config?.quant_method === "marlin";
}

function isTransformersModel(model: ModelData): boolean {
return model.tags.includes("transformers");
}

function isLlamaCppGgufModel(model: ModelData) {
return !!model.gguf?.context_length;
}
Expand Down Expand Up @@ -127,6 +146,47 @@ const snippetLocalAI = (model: ModelData, filepath?: string): LocalAppSnippet[]
];
};

const snippetVllm = (model: ModelData): LocalAppSnippet[] => {
const runCommand = [
"",
"# Call the server using curl:",
`curl -X POST "http://localhost:8000/v1/chat/completions" \\ `,
` -H "Content-Type: application/json" \\ `,
` --data '{`,
` "model": "${model.id}"`,
` "messages": [`,
` {"role": "user", "content": "Hello!"}`,
` ]`,
` }'`,
];
return [
{
title: "Install from pip",
setup: ["# Install vLLM from pip:", "pip install vllm"].join("\n"),
content: ["# Load and run the model:", `vllm serve "${model.id}"`, ...runCommand].join("\n"),
},
{
title: "Use Docker images",
setup: [
"# Deploy with docker on Linux:",
`docker run --runtime nvidia --gpus all \\`,
` --name my_vllm_container \\`,
` -v ~/.cache/huggingface:/root/.cache/huggingface \\`,
` --env "HUGGING_FACE_HUB_TOKEN=<secret>" \\`,
` -p 8000:8000 \\`,
` --ipc=host \\`,
` vllm/vllm-openai:latest \\`,
` --model ${model.id}`,
].join("\n"),
content: [
"# Load and run the model:",
`docker exec -it my_vllm_container bash -c "vllm serve ${model.id}"`,
...runCommand,
].join("\n"),
},
];
};

/**
* Add your new local app here.
*
Expand All @@ -146,6 +206,19 @@ export const LOCAL_APPS = {
displayOnModelPage: isLlamaCppGgufModel,
snippet: snippetLlamacpp,
},
vllm: {
prettyLabel: "vLLM",
docsUrl: "https://docs.vllm.ai",
mainTask: "text-generation",
displayOnModelPage: (model: ModelData) =>
isAwqModel(model) ||
isGptqModel(model) ||
isAqlmModel(model) ||
isMarlinModel(model) ||
isGgufModel(model) ||
isTransformersModel(model),
snippet: snippetVllm,
},
lmstudio: {
prettyLabel: "LM Studio",
docsUrl: "https://lmstudio.ai",
Expand Down
4 changes: 4 additions & 0 deletions packages/tasks/src/model-data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ export interface ModelData {
bits?: number;
load_in_4bit?: boolean;
load_in_8bit?: boolean;
/**
* awq, gptq, aqlm, marlin, … Used by vLLM
*/
quant_method?: string;
};
tokenizer_config?: TokenizerConfig;
adapter_transformers?: {
Expand Down