Skip to content

Commit

Permalink
Merge branch 'master' into feat-musicgen-duration
Browse files Browse the repository at this point in the history
Signed-off-by: Ettore Di Giacinto <[email protected]>
  • Loading branch information
mudler authored Aug 23, 2024
2 parents 62a9386 + ac5f6f2 commit 5322067
Show file tree
Hide file tree
Showing 28 changed files with 214 additions and 72 deletions.
3 changes: 2 additions & 1 deletion .devcontainer-scripts/utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,9 @@ config_remote() {
#
# Param 1: bash array, filenames relative to the customization directory that should be copied to ~/.ssh
setup_ssh() {
mkdir -p ~/.ssh
local files=("$@")
for file in "${files[@]}"; then
for file in "${files[@]}" ; do
local cfile="/devcontainer-customization/${file}"
local hfile="~/.ssh/${file}"
if [ ! -f "${hfile}" ]; then
Expand Down
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@ DETECT_LIBS?=true
# llama.cpp versions
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=fc54ef0d1c138133a01933296d50a36a1ab64735
CPPLLAMA_VERSION?=3ba780e2a8f0ffe13f571b27f0bbf2ca5a199efc

# go-rwkv version
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6

# whisper.cpp version
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
WHISPER_CPP_VERSION?=d65786ea540a5aef21f67cacfa6f134097727780
WHISPER_CPP_VERSION?=9e3c5345cd46ea718209db53464e426c3fe7a25e

# bert.cpp version
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
Expand Down
2 changes: 1 addition & 1 deletion backend/python/autogptq/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
accelerate
auto-gptq==0.7.1
grpcio==1.65.4
grpcio==1.66.0
protobuf
certifi
transformers
2 changes: 1 addition & 1 deletion backend/python/bark/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
bark==0.1.5
grpcio==1.65.5
grpcio==1.66.0
protobuf
certifi
2 changes: 1 addition & 1 deletion backend/python/common/template/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
grpcio==1.65.5
grpcio==1.66.0
protobuf
2 changes: 1 addition & 1 deletion backend/python/coqui/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
TTS==0.22.0
grpcio==1.65.5
grpcio==1.66.0
protobuf
certifi
2 changes: 1 addition & 1 deletion backend/python/diffusers/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
setuptools
grpcio==1.65.4
grpcio==1.66.0
pillow
protobuf
certifi
2 changes: 1 addition & 1 deletion backend/python/exllama/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
grpcio==1.65.5
grpcio==1.66.0
protobuf
certifi
setuptools
2 changes: 1 addition & 1 deletion backend/python/exllama2/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
grpcio==1.65.4
grpcio==1.66.0
protobuf
certifi
wheel
Expand Down
2 changes: 1 addition & 1 deletion backend/python/mamba/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
grpcio==1.65.5
grpcio==1.66.0
protobuf
certifi
2 changes: 1 addition & 1 deletion backend/python/openvoice/requirements-intel.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
intel-extension-for-pytorch
torch
optimum[openvino]
grpcio==1.65.5
grpcio==1.66.0
protobuf
librosa==0.9.1
faster-whisper==1.0.3
Expand Down
2 changes: 1 addition & 1 deletion backend/python/openvoice/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
grpcio==1.65.5
grpcio==1.66.0
protobuf
librosa
faster-whisper
Expand Down
2 changes: 1 addition & 1 deletion backend/python/parler-tts/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
grpcio==1.65.5
grpcio==1.66.0
protobuf
certifi
llvmlite==0.43.0
2 changes: 1 addition & 1 deletion backend/python/rerankers/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
grpcio==1.65.4
grpcio==1.66.0
protobuf
certifi
2 changes: 1 addition & 1 deletion backend/python/sentencetransformers/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
grpcio==1.65.5
grpcio==1.66.0
protobuf
certifi
2 changes: 1 addition & 1 deletion backend/python/transformers-musicgen/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
grpcio==1.65.5
grpcio==1.66.0
protobuf
scipy==1.14.0
certifi
2 changes: 1 addition & 1 deletion backend/python/transformers/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
grpcio==1.65.5
grpcio==1.66.0
protobuf
certifi
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
2 changes: 1 addition & 1 deletion backend/python/vall-e-x/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
grpcio==1.65.5
grpcio==1.66.0
protobuf
certifi
2 changes: 1 addition & 1 deletion backend/python/vllm/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
grpcio==1.65.5
grpcio==1.66.0
protobuf
certifi
setuptools
9 changes: 6 additions & 3 deletions core/http/endpoints/openai/chat.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,8 @@ import (
// @Success 200 {object} schema.OpenAIResponse "Response"
// @Router /v1/chat/completions [post]
func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error {
textContentToReturn := ""
id := uuid.New().String()
created := int(time.Now().Unix())
var id, textContentToReturn string
var created int

process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
initialMessage := schema.OpenAIResponse{
Expand Down Expand Up @@ -159,6 +158,10 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
}

return func(c *fiber.Ctx) error {
textContentToReturn = ""
id = uuid.New().String()
created = int(time.Now().Unix())

modelFile, input, err := readRequest(c, cl, ml, startupOptions, true)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
Expand Down
2 changes: 1 addition & 1 deletion docs/data/version.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{
"version": "v2.19.4"
"version": "v2.20.1"
}
91 changes: 91 additions & 0 deletions gallery/hermes-vllm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
---
name: "hermes-vllm"

config_file: |
backend: vllm
context_size: 8192
stopwords:
- "<|im_end|>"
- "<dummy32000>"
- "<|eot_id|>"
- "<|end_of_text|>"
function:
disable_no_action: true
grammar:
# Uncomment the line below to enable grammar matching for JSON results if the model is breaking
# the output. This will make the model more accurate and won't break the JSON output.
# This however, will make parallel_calls not functional (it is a known bug)
# mixed_mode: true
disable: true
parallel_calls: true
expect_strings_after_json: true
json_regex_match:
- "(?s)<tool_call>(.*?)</tool_call>"
- "(?s)<tool_call>(.*)"
capture_llm_results:
- (?s)<scratchpad>(.*?)</scratchpad>
replace_llm_results:
- key: (?s)<scratchpad>(.*?)</scratchpad>
value: ""
template:
use_tokenizer_template: true
chat: |
{{.Input -}}
<|im_start|>assistant
chat_message: |
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
{{- if .FunctionCall }}
<tool_call>
{{- else if eq .RoleName "tool" }}
<tool_response>
{{- end }}
{{- if .Content}}
{{.Content }}
{{- end }}
{{- if .FunctionCall}}
{{toJson .FunctionCall}}
{{- end }}
{{- if .FunctionCall }}
</tool_call>
{{- else if eq .RoleName "tool" }}
</tool_response>
{{- end }}<|im_end|>
completion: |
{{.Input}}
function: |
<|im_start|>system
You are a function calling AI model.
Here are the available tools:
<tools>
{{range .Functions}}
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
{{end}}
</tools>
You should call the tools provided to you sequentially
Please use <scratchpad> XML tags to record your reasoning and planning before you call the functions as follows:
<scratchpad>
{step-by-step reasoning and plan in bullet points}
</scratchpad>
For each function call return a json object with function name and arguments within <tool_call> XML tags as follows:
<tool_call>
{"arguments": <args-dict>, "name": <function-name>}
</tool_call><|im_end|>
{{.Input -}}
<|im_start|>assistant
# Uncomment to specify a quantization method (optional)
# quantization: "awq"
# Uncomment to limit the GPU memory utilization (vLLM default is 0.9 for 90%)
# gpu_memory_utilization: 0.5
# Uncomment to trust remote code from huggingface
# trust_remote_code: true
# Uncomment to enable eager execution
# enforce_eager: true
# Uncomment to specify the size of the CPU swap space per GPU (in GiB)
# swap_space: 2
# Uncomment to specify the maximum length of a sequence (including prompt and output)
# max_model_len: 32768
# Uncomment and specify the number of Tensor divisions.
# Allows you to partition and run large models. Performance gains are limited.
# https://github.com/vllm-project/vllm/issues/1435
# tensor_parallel_size: 2
32 changes: 32 additions & 0 deletions gallery/index.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4752,6 +4752,38 @@
- filename: Hermes-3-Llama-3.1-70B.Q4_K_M.gguf
sha256: 955c2f42caade4278f3c9dbffa32bb74572652b20e49e5340e782de3585bbe3f
uri: huggingface://NousResearch/Hermes-3-Llama-3.1-70B-GGUF/Hermes-3-Llama-3.1-70B.Q4_K_M.gguf
- &hermes-vllm
url: "github:mudler/LocalAI/gallery/hermes-vllm.yaml@master"
name: "hermes-3-llama-3.1-8b:vllm"
icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/vG6j5WxHX09yj32vgjJlI.jpeg
tags:
- llm
- vllm
- gpu
- function-calling
license: llama-3
urls:
- https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-8B
description: |
Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board. It is designed to focus on aligning LLMs to the user, with powerful steering capabilities and control given to the end user. The model uses ChatML as the prompt format, opening up a much more structured system for engaging the LLM in multi-turn chat dialogue. It also supports function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills.
overrides:
parameters:
model: NousResearch/Hermes-3-Llama-3.1-8B
- !!merge <<: *hermes-vllm
name: "hermes-3-llama-3.1-70b:vllm"
urls:
- https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-70B
overrides:
parameters:
model: NousResearch/Hermes-3-Llama-3.1-70B
- !!merge <<: *hermes-vllm
name: "hermes-3-llama-3.1-405b:vllm"
icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/-kj_KflXsdpcZoTQsvx7W.jpeg
urls:
- https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-405B
overrides:
parameters:
model: NousResearch/Hermes-3-Llama-3.1-405B
- !!merge <<: *hermes-2-pro-mistral
name: "biomistral-7b"
description: |
Expand Down
29 changes: 29 additions & 0 deletions gallery/vllm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
---
name: "vllm"

config_file: |
backend: vllm
function:
disable_no_action: true
grammar:
disable: true
parallel_calls: true
expect_strings_after_json: true
template:
use_tokenizer_template: true
# Uncomment to specify a quantization method (optional)
# quantization: "awq"
# Uncomment to limit the GPU memory utilization (vLLM default is 0.9 for 90%)
# gpu_memory_utilization: 0.5
# Uncomment to trust remote code from huggingface
# trust_remote_code: true
# Uncomment to enable eager execution
# enforce_eager: true
# Uncomment to specify the size of the CPU swap space per GPU (in GiB)
# swap_space: 2
# Uncomment to specify the maximum length of a sequence (including prompt and output)
# max_model_len: 32768
# Uncomment and specify the number of Tensor divisions.
# Allows you to partition and run large models. Performance gains are limited.
# https://github.com/vllm-project/vllm/issues/1435
# tensor_parallel_size: 2
13 changes: 7 additions & 6 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ require (
github.com/libp2p/go-libp2p v0.36.2
github.com/mholt/archiver/v3 v3.5.1
github.com/microcosm-cc/bluemonday v1.0.26
github.com/mudler/go-processmanager v0.0.0-20240820160718-8b802d3ecf82
github.com/mudler/edgevpn v0.27.3
github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c
github.com/mudler/go-stable-diffusion v0.0.0-20240429204715-4a3cd6aeae6f
github.com/onsi/ginkgo/v2 v2.20.0
github.com/onsi/gomega v1.34.1
Expand Down Expand Up @@ -84,6 +84,7 @@ require (
github.com/pion/transport/v2 v2.2.10 // indirect
github.com/pion/turn/v2 v2.1.6 // indirect
github.com/pion/webrtc/v3 v3.3.0 // indirect
github.com/shirou/gopsutil/v4 v4.24.7 // indirect
github.com/wlynxg/anet v0.0.4 // indirect
go.uber.org/mock v0.4.0 // indirect
)
Expand Down Expand Up @@ -132,7 +133,7 @@ require (
github.com/go-audio/riff v1.0.0 // indirect
github.com/go-logr/logr v1.4.2 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-ole/go-ole v1.2.6 // indirect
github.com/go-ole/go-ole v1.3.0 // indirect
github.com/go-openapi/jsonpointer v0.21.0 // indirect
github.com/go-openapi/jsonreference v0.21.0 // indirect
github.com/go-openapi/spec v0.21.0 // indirect
Expand Down Expand Up @@ -188,7 +189,7 @@ require (
github.com/libp2p/go-yamux/v4 v4.0.1 // indirect
github.com/libp2p/zeroconf/v2 v2.2.0 // indirect
github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect
github.com/lufia/plan9stats v0.0.0-20240819163618-b1d8f4d146e7 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/marten-seemann/tcp v0.0.0-20210406111302-dfbc87cc63fd // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
Expand Down Expand Up @@ -234,7 +235,7 @@ require (
github.com/pkoukk/tiktoken-go v0.1.6 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/polydawn/refmt v0.89.0 // indirect
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect
github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/common v0.55.0 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
Expand All @@ -252,8 +253,8 @@ require (
github.com/spf13/cast v1.5.0 // indirect
github.com/swaggo/files/v2 v2.0.0 // indirect
github.com/tinylib/msgp v1.1.8 // indirect
github.com/tklauser/go-sysconf v0.3.12 // indirect
github.com/tklauser/numcpus v0.6.1 // indirect
github.com/tklauser/go-sysconf v0.3.14 // indirect
github.com/tklauser/numcpus v0.8.0 // indirect
github.com/ulikunitz/xz v0.5.9 // indirect
github.com/valyala/bytebufferpool v1.0.0 // indirect
github.com/valyala/tcplisten v1.0.0 // indirect
Expand Down
Loading

0 comments on commit 5322067

Please sign in to comment.