From 3d83128f169de3676b341245b985af2e50da9c0f Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 16 Dec 2023 12:22:45 -0500 Subject: [PATCH] feat(alias): alias llama to llama-cpp, update docs (#1448) Signed-off-by: Ettore Di Giacinto --- docs/content/model-compatibility/_index.en.md | 2 ++ docs/content/model-compatibility/llama-cpp.md | 6 +++--- pkg/model/initializers.go | 11 ++++++++++- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/docs/content/model-compatibility/_index.en.md b/docs/content/model-compatibility/_index.en.md index a45d36eeeed9..9f95d4e6506e 100644 --- a/docs/content/model-compatibility/_index.en.md +++ b/docs/content/model-compatibility/_index.en.md @@ -50,6 +50,8 @@ Besides llama based models, LocalAI is compatible also with other architectures. | `diffusers` | SD,... | no | Image generation | no | no | N/A | | `vall-e-x` | Vall-E | no | Audio generation and Voice cloning | no | no | CPU/CUDA | | `vllm` | Various GPTs and quantization formats | yes | GPT | no | no | CPU/CUDA | +| `exllama2` | GPTQ | yes | GPT only | no | no | N/A | +| `transformers-musicgen` | | no | Audio generation | no | no | N/A | Note: any backend name listed above can be used in the `backend` field of the model configuration file (See [the advanced section]({{%relref "advanced" %}})). diff --git a/docs/content/model-compatibility/llama-cpp.md b/docs/content/model-compatibility/llama-cpp.md index 6fcd498f9311..ccea3b952813 100644 --- a/docs/content/model-compatibility/llama-cpp.md +++ b/docs/content/model-compatibility/llama-cpp.md @@ -9,7 +9,7 @@ weight = 1 {{% notice note %}} -The `ggml` file format has been deprecated. If you are using `ggml` models and you are configuring your model with a YAML file, specify, use the `llama-stable` backend instead. If you are relying in automatic detection of the model, you should be fine. For `gguf` models, use the `llama` backend. +The `ggml` file format has been deprecated. If you are using `ggml` models and you are configuring your model with a YAML file, specify, use the `llama-ggml` backend instead. If you are relying in automatic detection of the model, you should be fine. For `gguf` models, use the `llama` backend. The go backend is deprecated as well but still available as `go-llama`. The go backend supports still features not available in the mainline: speculative sampling and embeddings. {{% /notice %}} @@ -65,11 +65,11 @@ parameters: In the example above we specify `llama` as the backend to restrict loading `gguf` models only. -For instance, to use the `llama-stable` backend for `ggml` models: +For instance, to use the `llama-ggml` backend for `ggml` models: ```yaml name: llama -backend: llama-stable +backend: llama-ggml parameters: # Relative to the models path model: file.ggml.bin diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index 48ad922fcbfb..796dc5aeb51e 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -14,6 +14,11 @@ import ( "github.com/rs/zerolog/log" ) +var Aliases map[string]string = map[string]string{ + "go-llama": GoLlamaBackend, + "llama": LLamaCPP, +} + const ( GoLlamaBackend = "llama" LlamaGGML = "llama-ggml" @@ -169,9 +174,13 @@ func (ml *ModelLoader) resolveAddress(addr ModelAddress, parallel bool) (*grpc.C func (ml *ModelLoader) BackendLoader(opts ...Option) (client *grpc.Client, err error) { o := NewOptions(opts...) - log.Debug().Msgf("Loading model %s from %s", o.backendString, o.model) + log.Info().Msgf("Loading model '%s' with backend %s", o.model, o.backendString) backend := strings.ToLower(o.backendString) + if realBackend, exists := Aliases[backend]; exists { + backend = realBackend + log.Debug().Msgf("%s is an alias of %s", backend, realBackend) + } if o.singleActiveBackend { ml.mu.Lock()