Skip to content

Commit

Permalink
Merge branch 'master' into fix-pr-folder-tasks
Browse files Browse the repository at this point in the history
  • Loading branch information
mudler authored Oct 1, 2024
2 parents 3053f77 + 1392093 commit 171c55d
Show file tree
Hide file tree
Showing 98 changed files with 1,633 additions and 663 deletions.
2 changes: 2 additions & 0 deletions .devcontainer-scripts/utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
# Param 2: email
#
config_user() {
echo "Configuring git for $1 <$2>"
local gcn=$(git config --global user.name)
if [ -z "${gcn}" ]; then
echo "Setting up git user / remote"
Expand All @@ -24,6 +25,7 @@ config_user() {
# Param 2: remote url
#
config_remote() {
echo "Adding git remote and fetching $2 as $1"
local gr=$(git remote -v | grep $1)
if [ -z "${gr}" ]; then
git remote add $1 $2
Expand Down
11 changes: 8 additions & 3 deletions .github/check_and_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,14 @@ def calculate_sha256(file_path):
def manual_safety_check_hf(repo_id):
scanResponse = requests.get('https://huggingface.co/api/models/' + repo_id + "/scan")
scan = scanResponse.json()
if scan['hasUnsafeFile']:
return scan
return None
# Check if 'hasUnsafeFile' exists in the response
if 'hasUnsafeFile' in scan:
if scan['hasUnsafeFile']:
return scan
else:
return None
else:
return None

download_type, repo_id_or_url = parse_uri(uri)

Expand Down
117 changes: 73 additions & 44 deletions .github/workflows/image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,78 @@ concurrency:
cancel-in-progress: true

jobs:
hipblas-jobs:
uses: ./.github/workflows/image_build.yml
with:
tag-latest: ${{ matrix.tag-latest }}
tag-suffix: ${{ matrix.tag-suffix }}
ffmpeg: ${{ matrix.ffmpeg }}
image-type: ${{ matrix.image-type }}
build-type: ${{ matrix.build-type }}
cuda-major-version: ${{ matrix.cuda-major-version }}
cuda-minor-version: ${{ matrix.cuda-minor-version }}
platforms: ${{ matrix.platforms }}
runs-on: ${{ matrix.runs-on }}
base-image: ${{ matrix.base-image }}
grpc-base-image: ${{ matrix.grpc-base-image }}
aio: ${{ matrix.aio }}
makeflags: ${{ matrix.makeflags }}
latest-image: ${{ matrix.latest-image }}
latest-image-aio: ${{ matrix.latest-image-aio }}
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
strategy:
# Pushing with all jobs in parallel
# eats the bandwidth of all the nodes
max-parallel: 2
matrix:
include:
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-hipblas-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
aio: "-aio-gpu-hipblas"
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
latest-image: 'latest-gpu-hipblas'
latest-image-aio: 'latest-aio-gpu-hipblas'
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas'
ffmpeg: 'false'
image-type: 'extras'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas-core'
ffmpeg: 'false'
image-type: 'core'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
self-hosted-jobs:
uses: ./.github/workflows/image_build.yml
with:
Expand All @@ -39,7 +111,7 @@ jobs:
strategy:
# Pushing with all jobs in parallel
# eats the bandwidth of all the nodes
max-parallel: ${{ github.event_name != 'pull_request' && 6 || 10 }}
max-parallel: ${{ github.event_name != 'pull_request' && 5 || 8 }}
matrix:
include:
# Extra images
Expand Down Expand Up @@ -122,29 +194,6 @@ jobs:
base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-hipblas-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
aio: "-aio-gpu-hipblas"
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
latest-image: 'latest-gpu-hipblas'
latest-image-aio: 'latest-aio-gpu-hipblas'
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas'
ffmpeg: 'false'
image-type: 'extras'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'sycl_f16'
platforms: 'linux/amd64'
tag-latest: 'auto'
Expand Down Expand Up @@ -212,26 +261,6 @@ jobs:
image-type: 'core'
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas-core'
ffmpeg: 'false'
image-type: 'core'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"

core-image-build:
uses: ./.github/workflows/image_build.yml
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/secscan.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
if: ${{ github.actor != 'dependabot[bot]' }}
- name: Run Gosec Security Scanner
if: ${{ github.actor != 'dependabot[bot]' }}
uses: securego/[email protected].0
uses: securego/[email protected].4
with:
# we let the report trigger content trigger a failure using the GitHub Security features.
args: '-no-fail -fmt sarif -out results.sarif ./...'
Expand Down
11 changes: 10 additions & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -189,13 +189,22 @@ jobs:
uses: actions/checkout@v4
with:
submodules: true
- name: Dependencies
run: |
# Install protoc
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
rm protoc.zip
go install google.golang.org/protobuf/cmd/[email protected]
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
PATH="$PATH:$HOME/go/bin" make protogen-go
- name: Build images
run: |
docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=extras --build-arg EXTRA_BACKENDS=rerankers --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
- name: Test
run: |
LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \
PATH="$PATH:$HOME/go/bin" LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \
make run-e2e-aio
- name: Setup tmate session if tests fail
if: ${{ failure() }}
Expand Down
13 changes: 6 additions & 7 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -297,10 +297,10 @@ COPY .git .
RUN make prepare

## Build the binary
## If it's CUDA, we want to skip some of the llama-compat backends to save space
## We only leave the most CPU-optimized variant and the fallback for the cublas build
## (both will use CUDA for the actual computation)
RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
## If it's CUDA or hipblas, we want to skip some of the llama-compat backends to save space
## We only leave the most CPU-optimized variant and the fallback for the cublas/hipblas build
## (both will use CUDA or hipblas for the actual computation)
RUN if [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
else \
make build; \
Expand Down Expand Up @@ -338,9 +338,8 @@ RUN if [ "${FFMPEG}" = "true" ]; then \

RUN apt-get update && \
apt-get install -y --no-install-recommends \
ssh less && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
ssh less wget
# For the devcontainer, leave apt functional in case additional devtools are needed at runtime.

RUN go install github.com/go-delve/delve/cmd/dlv@latest

Expand Down
9 changes: 6 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@ DETECT_LIBS?=true
# llama.cpp versions
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=6262d13e0b2da91f230129a93a996609a2f5a2f2
CPPLLAMA_VERSION?=6f1d9d71f4c568778a7637ff6582e6f6ba5fb9d3

# go-rwkv version
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6

# whisper.cpp version
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
WHISPER_CPP_VERSION?=049b3a0e53c8a8e4c4576c06a1a4fccf0063a73f
WHISPER_CPP_VERSION?=8feb375fbdf0277ad36958c218c6bf48fa0ba75a

# bert.cpp version
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
Expand Down Expand Up @@ -359,6 +359,9 @@ clean-tests:
rm -rf test-dir
rm -rf core/http/backend-assets

clean-dc: clean
cp -r /build/backend-assets /workspace/backend-assets

## Build:
build: prepare backend-assets grpcs ## Build the project
$(info ${GREEN}I local-ai build info:${RESET})
Expand Down Expand Up @@ -465,7 +468,7 @@ run-e2e-image:
ls -liah $(abspath ./tests/e2e-fixtures)
docker run -p 5390:8080 -e MODELS_PATH=/models -e THREADS=1 -e DEBUG=true -d --rm -v $(TEST_DIR):/models --gpus all --name e2e-tests-$(RANDOM) localai-tests

run-e2e-aio:
run-e2e-aio: protogen-go
@echo 'Running e2e AIO tests'
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e-aio

Expand Down
2 changes: 1 addition & 1 deletion aio/cpu/vision.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ backend: llama-cpp
context_size: 4096
f16: true
mmap: true
name: gpt-4-vision-preview
name: gpt-4o

roles:
user: "USER:"
Expand Down
2 changes: 1 addition & 1 deletion aio/gpu-8g/vision.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ backend: llama-cpp
context_size: 4096
f16: true
mmap: true
name: gpt-4-vision-preview
name: gpt-4o

roles:
user: "USER:"
Expand Down
2 changes: 1 addition & 1 deletion aio/intel/vision.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ backend: llama-cpp
context_size: 4096
mmap: false
f16: false
name: gpt-4-vision-preview
name: gpt-4o

roles:
user: "USER:"
Expand Down
3 changes: 3 additions & 0 deletions backend/backend.proto
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,9 @@ message PredictOptions {
repeated string Images = 42;
bool UseTokenizerTemplate = 43;
repeated Message Messages = 44;
repeated string Videos = 45;
repeated string Audios = 46;
string CorrelationId = 47;
}

// The response message containing the result
Expand Down
14 changes: 14 additions & 0 deletions backend/cpp/llama/grpc-server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2106,6 +2106,9 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
data["ignore_eos"] = predict->ignoreeos();
data["embeddings"] = predict->embeddings();

// Add the correlationid to json data
data["correlation_id"] = predict->correlationid();

// for each image in the request, add the image data
//
for (int i = 0; i < predict->images_size(); i++) {
Expand Down Expand Up @@ -2344,6 +2347,11 @@ class BackendServiceImpl final : public backend::Backend::Service {
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
reply.set_prompt_tokens(tokens_evaluated);

// Log Request Correlation Id
LOG_VERBOSE("correlation:", {
{ "id", data["correlation_id"] }
});

// Send the reply
writer->Write(reply);

Expand All @@ -2367,6 +2375,12 @@ class BackendServiceImpl final : public backend::Backend::Service {
std::string completion_text;
task_result result = llama.queue_results.recv(task_id);
if (!result.error && result.stop) {

// Log Request Correlation Id
LOG_VERBOSE("correlation:", {
{ "id", data["correlation_id"] }
});

completion_text = result.result_json.value("content", "");
int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0);
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
Expand Down
2 changes: 1 addition & 1 deletion backend/python/autogptq/requirements-intel.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
intel-extension-for-pytorch
torch
optimum[openvino]
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
2 changes: 1 addition & 1 deletion backend/python/autogptq/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
accelerate
auto-gptq==0.7.1
grpcio==1.66.1
grpcio==1.66.2
protobuf
certifi
transformers
2 changes: 1 addition & 1 deletion backend/python/bark/requirements-intel.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@ intel-extension-for-pytorch
torch
torchaudio
optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
transformers
accelerate
2 changes: 1 addition & 1 deletion backend/python/bark/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
bark==0.1.5
grpcio==1.66.1
grpcio==1.66.2
protobuf
certifi
2 changes: 1 addition & 1 deletion backend/python/common/template/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
grpcio==1.66.1
grpcio==1.66.2
protobuf
2 changes: 1 addition & 1 deletion backend/python/coqui/requirements-intel.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@ intel-extension-for-pytorch
torch
torchaudio
optimum[openvino]
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
transformers
accelerate
Loading

0 comments on commit 171c55d

Please sign in to comment.