diff --git a/Makefile b/Makefile index 6f04a5831cce..03ea7d51ad86 100644 --- a/Makefile +++ b/Makefile @@ -145,7 +145,7 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts) OPTIONAL_GRPC+=backend-assets/grpc/piper endif -ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface backend-assets/grpc/bert-embeddings backend-assets/grpc/llama backend-assets/grpc/llama-cpp backend-assets/grpc/llama-ggml backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gpt2 backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/starcoder backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC) +ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface backend-assets/grpc/bert-embeddings backend-assets/grpc/llama backend-assets/grpc/llama-cpp backend-assets/grpc/llama-ggml backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC) GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC) # If empty, then we build all @@ -509,10 +509,6 @@ backend-assets/grpc/dolly: backend-assets/grpc sources/go-ggml-transformers/libt CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/dolly ./backend/go/llm/dolly/ -backend-assets/grpc/gpt2: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \ - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt2 ./backend/go/llm/gpt2/ - backend-assets/grpc/gptj: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gptj ./backend/go/llm/gptj/ @@ -529,10 +525,6 @@ backend-assets/grpc/replit: backend-assets/grpc sources/go-ggml-transformers/lib CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/replit ./backend/go/llm/replit/ -backend-assets/grpc/starcoder: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \ - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/starcoder ./backend/go/llm/starcoder/ - backend-assets/grpc/rwkv: backend-assets/grpc sources/go-rwkv/librwkv.a CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv LIBRARY_PATH=$(CURDIR)/sources/go-rwkv \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv diff --git a/backend/go/llm/gpt2/main.go b/backend/go/llm/gpt2/main.go deleted file mode 100644 index 972e39d385d0..000000000000 --- a/backend/go/llm/gpt2/main.go +++ /dev/null @@ -1,23 +0,0 @@ -package main - -// Note: this is started internally by LocalAI and a server is allocated for each model - -import ( - "flag" - - transformers "github.com/go-skynet/LocalAI/backend/go/llm/transformers" - - grpc "github.com/go-skynet/LocalAI/pkg/grpc" -) - -var ( - addr = flag.String("addr", "localhost:50051", "the address to connect to") -) - -func main() { - flag.Parse() - - if err := grpc.StartServer(*addr, &transformers.GPT2{}); err != nil { - panic(err) - } -} diff --git a/backend/go/llm/starcoder/main.go b/backend/go/llm/starcoder/main.go deleted file mode 100644 index 38534d6c09d7..000000000000 --- a/backend/go/llm/starcoder/main.go +++ /dev/null @@ -1,23 +0,0 @@ -package main - -// Note: this is started internally by LocalAI and a server is allocated for each model - -import ( - "flag" - - transformers "github.com/go-skynet/LocalAI/backend/go/llm/transformers" - - grpc "github.com/go-skynet/LocalAI/pkg/grpc" -) - -var ( - addr = flag.String("addr", "localhost:50051", "the address to connect to") -) - -func main() { - flag.Parse() - - if err := grpc.StartServer(*addr, &transformers.Starcoder{}); err != nil { - panic(err) - } -} diff --git a/docs/content/docs/reference/compatibility-table.md b/docs/content/docs/reference/compatibility-table.md index 98446e1dfd0f..7c3fe154bde2 100644 --- a/docs/content/docs/reference/compatibility-table.md +++ b/docs/content/docs/reference/compatibility-table.md @@ -16,18 +16,16 @@ LocalAI will attempt to automatically load models which are not explicitly confi | Backend and Bindings | Compatible models | Completion/Chat endpoint | Capability | Embeddings support | Token stream support | Acceleration | |----------------------------------------------------------------------------------|-----------------------|--------------------------|---------------------------|-----------------------------------|----------------------|--------------| -| [llama.cpp]({{%relref "docs/features/text-generation#llama.cpp" %}}) | Vicuna, Alpaca, LLaMa, Falcon, [and many others](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#description) | yes | GPT and Functions | yes** | yes | CUDA, openCL, cuBLAS, Metal | +| [llama.cpp]({{%relref "docs/features/text-generation#llama.cpp" %}}) | Vicuna, Alpaca, LLaMa, Falcon, Starcoder, GPT-2, [and many others](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#description) | yes | GPT and Functions | yes** | yes | CUDA, openCL, cuBLAS, Metal | | [gpt4all-llama](https://github.com/nomic-ai/gpt4all) | Vicuna, Alpaca, LLaMa | yes | GPT | no | yes | N/A | | [gpt4all-mpt](https://github.com/nomic-ai/gpt4all) | MPT | yes | GPT | no | yes | N/A | | [gpt4all-j](https://github.com/nomic-ai/gpt4all) | GPT4ALL-J | yes | GPT | no | yes | N/A | | [falcon-ggml](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | Falcon (*) | yes | GPT | no | no | N/A | -| [gpt2](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | GPT2, Cerebras | yes | GPT | no | no | N/A | | [dolly](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | Dolly | yes | GPT | no | no | N/A | | [gptj](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | GPTJ | yes | GPT | no | no | N/A | | [mpt](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | MPT | yes | GPT | no | no | N/A | | [replit](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | Replit | yes | GPT | no | no | N/A | | [gptneox](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | GPT NeoX, RedPajama, StableLM | yes | GPT | no | no | N/A | -| [starcoder](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | Starcoder | yes | GPT | no | no | N/A| | [bloomz](https://github.com/NouamaneTazi/bloomz.cpp) ([binding](https://github.com/go-skynet/bloomz.cpp)) | Bloom | yes | GPT | no | no | N/A | | [rwkv](https://github.com/saharNooby/rwkv.cpp) ([binding](https://github.com/donomii/go-rwkv.cpp)) | rwkv | yes | GPT | no | yes | N/A | | [bert](https://github.com/skeskinen/bert.cpp) ([binding](https://github.com/go-skynet/go-bert.cpp)) | bert | no | Embeddings only | yes | no | N/A | @@ -47,6 +45,7 @@ LocalAI will attempt to automatically load models which are not explicitly confi | [tinydream](https://github.com/symisc/tiny-dream#tiny-dreaman-embedded-header-only-stable-diffusion-inference-c-librarypixlabiotiny-dream) | stablediffusion | no | Image | no | no | N/A | | `coqui` | Coqui | no | Audio generation and Voice cloning | no | no | CPU/CUDA | | `petals` | Various GPTs and quantization formats | yes | GPT | no | no | CPU/CUDA | +| `transformers` | Various GPTs and quantization formats | yes | GPT, embeddings | yes | no | CPU/CUDA | Note: any backend name listed above can be used in the `backend` field of the model configuration file (See [the advanced section]({{%relref "docs/advanced" %}})). diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index 8c5abdcc0f7b..df0aaf2f7a02 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -23,13 +23,11 @@ const ( GoLlamaBackend = "llama" LlamaGGML = "llama-ggml" LLamaCPP = "llama-cpp" - StarcoderBackend = "starcoder" GPTJBackend = "gptj" DollyBackend = "dolly" MPTBackend = "mpt" GPTNeoXBackend = "gptneox" ReplitBackend = "replit" - Gpt2Backend = "gpt2" Gpt4AllLlamaBackend = "gpt4all-llama" Gpt4AllMptBackend = "gpt4all-mpt" Gpt4AllJBackend = "gpt4all-j" @@ -55,11 +53,9 @@ var AutoLoadBackends []string = []string{ GPTNeoXBackend, BertEmbeddingsBackend, GPTJBackend, - Gpt2Backend, DollyBackend, MPTBackend, ReplitBackend, - StarcoderBackend, RwkvBackend, WhisperBackend, StableDiffusionBackend,