Skip to content

Commit f640eac

Browse files
authored
Merge pull request #2 from lochjin/master
feat:merge from master in ggml-org#16376
2 parents 29db96f + 5c5a0d2 commit f640eac

File tree

945 files changed

+106174
-41661
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

945 files changed

+106174
-41661
lines changed

.clang-format

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,14 @@ AllowShortIfStatementsOnASingleLine: Never
2222
AllowShortLambdasOnASingleLine: Inline
2323
AllowShortLoopsOnASingleLine: false
2424
AlwaysBreakBeforeMultilineStrings: true
25-
BinPackArguments: false
25+
# Treat CUDA keywords/attributes as "attribute macros" and avoid breaking lines inside them
26+
AttributeMacros:
27+
- __host__
28+
- __device__
29+
- __global__
30+
- __forceinline__
31+
- __launch_bounds__
32+
BinPackArguments: true
2633
BinPackParameters: false # OnePerLine
2734
BitFieldColonSpacing: Both
2835
BreakBeforeBraces: Custom # Attach

.clang-tidy

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ Checks: >
1717
clang-analyzer-*,
1818
-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
1919
performance-*,
20+
-performance-enum-size,
2021
portability-*,
2122
-portability-simd-intrinsics,
2223
misc-*,

.devops/cloud-v-pipeline

Lines changed: 0 additions & 22 deletions
This file was deleted.

.devops/cpu.Dockerfile

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,15 @@ FROM ubuntu:$UBUNTU_VERSION AS build
44

55
ARG TARGETARCH
66

7-
ARG GGML_CPU_ARM_ARCH=armv8-a
8-
97
RUN apt-get update && \
108
apt-get install -y build-essential git cmake libcurl4-openssl-dev
119

1210
WORKDIR /app
1311

1412
COPY . .
1513

16-
RUN if [ "$TARGETARCH" = "amd64" ]; then \
14+
RUN if [ "$TARGETARCH" = "amd64" ] || [ "$TARGETARCH" = "arm64" ]; then \
1715
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \
18-
elif [ "$TARGETARCH" = "arm64" ]; then \
19-
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DGGML_CPU_ARM_ARCH=${GGML_CPU_ARM_ARCH}; \
2016
else \
2117
echo "Unsupported architecture"; \
2218
exit 1; \

.devops/cuda.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ RUN apt-get update \
6161
python3 \
6262
python3-pip \
6363
&& pip install --upgrade pip setuptools wheel \
64-
&& pip install -r requirements.txt \
64+
&& pip install --break-system-packages -r requirements.txt \
6565
&& apt autoremove -y \
6666
&& apt clean -y \
6767
&& rm -rf /tmp/* /var/tmp/* \

.devops/musa.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
ARG UBUNTU_VERSION=22.04
22
# This needs to generally match the container host's environment.
3-
ARG MUSA_VERSION=rc4.2.0
3+
ARG MUSA_VERSION=rc4.3.0
44
# Target the MUSA build image
55
ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}-amd64
66

.devops/rocm.Dockerfile

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
ARG UBUNTU_VERSION=24.04
22

33
# This needs to generally match the container host's environment.
4-
ARG ROCM_VERSION=6.4
5-
ARG AMDGPU_VERSION=6.4
4+
ARG ROCM_VERSION=7.0
5+
ARG AMDGPU_VERSION=7.0
66

7-
# Target the CUDA build image
7+
# Target the ROCm build image
88
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
99

1010
### Build image
@@ -13,18 +13,14 @@ FROM ${BASE_ROCM_DEV_CONTAINER} AS build
1313
# Unless otherwise specified, we make a fat build.
1414
# List from https://github.com/ggml-org/llama.cpp/pull/1087#issuecomment-1682807878
1515
# This is mostly tied to rocBLAS supported archs.
16-
# gfx803, gfx900, gfx1032, gfx1101, gfx1102,not officialy supported
17-
# gfx906 is deprecated
18-
#check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/reference/system-requirements.html
16+
# gfx803, gfx900, gfx906, gfx1032, gfx1101, gfx1102,not officialy supported
17+
# check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.1/reference/system-requirements.html
1918

20-
ARG ROCM_DOCKER_ARCH='gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102'
21-
#ARG ROCM_DOCKER_ARCH=gfx1100
19+
ARG ROCM_DOCKER_ARCH='gfx803;gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1010;gfx1030;gfx1032;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201;gfx1151'
20+
#ARG ROCM_DOCKER_ARCH='gfx1151'
2221

23-
# Set nvcc architectured
22+
# Set ROCm architectures
2423
ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
25-
# Enable ROCm
26-
# ENV CC=/opt/rocm/llvm/bin/clang
27-
# ENV CXX=/opt/rocm/llvm/bin/clang++
2824

2925
RUN apt-get update \
3026
&& apt-get install -y \
@@ -40,7 +36,12 @@ WORKDIR /app
4036
COPY . .
4137

4238
RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
43-
cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DCMAKE_BUILD_TYPE=Release -DLLAMA_BUILD_TESTS=OFF \
39+
cmake -S . -B build \
40+
-DGGML_HIP=ON \
41+
-DGGML_HIP_ROCWMMA_FATTN=ON \
42+
-DAMDGPU_TARGETS="$ROCM_DOCKER_ARCH" \
43+
-DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON \
44+
-DCMAKE_BUILD_TYPE=Release -DLLAMA_BUILD_TESTS=OFF \
4445
&& cmake --build build --config Release -j$(nproc)
4546

4647
RUN mkdir -p /app/lib \

.devops/s390x.Dockerfile

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
ARG GCC_VERSION=15.2.0
2+
ARG UBUNTU_VERSION=24.04
3+
4+
### Build Llama.cpp stage
5+
FROM gcc:${GCC_VERSION} AS build
6+
7+
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
8+
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
9+
apt update -y && \
10+
apt upgrade -y && \
11+
apt install -y --no-install-recommends \
12+
git cmake ccache ninja-build \
13+
# WARNING: Do not use libopenblas-openmp-dev. libopenblas-dev is faster.
14+
libopenblas-dev libcurl4-openssl-dev && \
15+
rm -rf /var/lib/apt/lists/*
16+
17+
WORKDIR /app
18+
COPY . .
19+
20+
RUN --mount=type=cache,target=/root/.ccache \
21+
--mount=type=cache,target=/app/build \
22+
cmake -S . -B build -G Ninja \
23+
-DCMAKE_BUILD_TYPE=Release \
24+
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
25+
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
26+
-DLLAMA_BUILD_TESTS=OFF \
27+
-DGGML_BACKEND_DL=OFF \
28+
-DGGML_NATIVE=OFF \
29+
-DGGML_BLAS=ON \
30+
-DGGML_BLAS_VENDOR=OpenBLAS && \
31+
cmake --build build --config Release -j $(nproc) && \
32+
cmake --install build --prefix /opt/llama.cpp
33+
34+
COPY *.py /opt/llama.cpp/bin
35+
COPY .devops/tools.sh /opt/llama.cpp/bin
36+
37+
COPY gguf-py /opt/llama.cpp/gguf-py
38+
COPY requirements.txt /opt/llama.cpp/gguf-py
39+
COPY requirements /opt/llama.cpp/gguf-py/requirements
40+
41+
42+
### Collect all llama.cpp binaries, libraries and distro libraries
43+
FROM scratch AS collector
44+
45+
# Copy llama.cpp binaries and libraries
46+
COPY --from=build /opt/llama.cpp/bin /llama.cpp/bin
47+
COPY --from=build /opt/llama.cpp/lib /llama.cpp/lib
48+
COPY --from=build /opt/llama.cpp/gguf-py /llama.cpp/gguf-py
49+
50+
51+
### Base image
52+
FROM ubuntu:${UBUNTU_VERSION} AS base
53+
54+
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
55+
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
56+
apt update -y && \
57+
apt install -y --no-install-recommends \
58+
# WARNING: Do not use libopenblas-openmp-dev. libopenblas-dev is faster.
59+
# See: https://github.com/ggml-org/llama.cpp/pull/15915#issuecomment-3317166506
60+
curl libgomp1 libopenblas-dev && \
61+
apt autoremove -y && \
62+
apt clean -y && \
63+
rm -rf /tmp/* /var/tmp/* && \
64+
find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
65+
find /var/cache -type f -delete
66+
67+
# Copy llama.cpp libraries
68+
COPY --from=collector /llama.cpp/lib /usr/lib/s390x-linux-gnu
69+
70+
71+
### Full
72+
FROM base AS full
73+
74+
ENV PATH="/root/.cargo/bin:${PATH}"
75+
WORKDIR /app
76+
77+
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
78+
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
79+
apt update -y && \
80+
apt install -y \
81+
git cmake libjpeg-dev \
82+
python3 python3-pip python3-dev && \
83+
apt autoremove -y && \
84+
apt clean -y && \
85+
rm -rf /tmp/* /var/tmp/* && \
86+
find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
87+
find /var/cache -type f -delete
88+
89+
RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
90+
91+
COPY --from=collector /llama.cpp/bin /app
92+
COPY --from=collector /llama.cpp/gguf-py /app/gguf-py
93+
94+
RUN pip install --no-cache-dir --break-system-packages \
95+
-r /app/gguf-py/requirements.txt
96+
97+
ENTRYPOINT [ "/app/tools.sh" ]
98+
99+
100+
### CLI Only
101+
FROM base AS light
102+
103+
WORKDIR /llama.cpp/bin
104+
105+
# Copy llama.cpp binaries and libraries
106+
COPY --from=collector /llama.cpp/bin/llama-cli /llama.cpp/bin
107+
108+
ENTRYPOINT [ "/llama.cpp/bin/llama-cli" ]
109+
110+
111+
### Server
112+
FROM base AS server
113+
114+
ENV LLAMA_ARG_HOST=0.0.0.0
115+
116+
WORKDIR /llama.cpp/bin
117+
118+
# Copy llama.cpp binaries and libraries
119+
COPY --from=collector /llama.cpp/bin/llama-server /llama.cpp/bin
120+
121+
EXPOSE 8080
122+
123+
ENTRYPOINT [ "/llama.cpp/bin/llama-server" ]

.devops/vulkan.Dockerfile

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,30 @@ ARG UBUNTU_VERSION=24.04
22

33
FROM ubuntu:$UBUNTU_VERSION AS build
44

5-
# Install build tools
6-
RUN apt update && apt install -y git build-essential cmake wget
5+
# Ref: https://vulkan.lunarg.com/doc/sdk/latest/linux/getting_started.html
76

8-
# Install Vulkan SDK and cURL
9-
RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
10-
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-noble.list https://packages.lunarg.com/vulkan/lunarg-vulkan-noble.list && \
11-
apt update -y && \
12-
apt-get install -y vulkan-sdk libcurl4-openssl-dev curl
7+
# Install build tools
8+
RUN apt update && apt install -y git build-essential cmake wget xz-utils
9+
10+
# Install Vulkan SDK
11+
ARG VULKAN_VERSION=1.4.321.1
12+
RUN ARCH=$(uname -m) && \
13+
wget -qO /tmp/vulkan-sdk.tar.xz https://sdk.lunarg.com/sdk/download/${VULKAN_VERSION}/linux/vulkan-sdk-linux-${ARCH}-${VULKAN_VERSION}.tar.xz && \
14+
mkdir -p /opt/vulkan && \
15+
tar -xf /tmp/vulkan-sdk.tar.xz -C /tmp --strip-components=1 && \
16+
mv /tmp/${ARCH}/* /opt/vulkan/ && \
17+
rm -rf /tmp/*
18+
19+
# Install cURL and Vulkan SDK dependencies
20+
RUN apt install -y libcurl4-openssl-dev curl \
21+
libxcb-xinput0 libxcb-xinerama0 libxcb-cursor-dev
22+
23+
# Set environment variables
24+
ENV VULKAN_SDK=/opt/vulkan
25+
ENV PATH=$VULKAN_SDK/bin:$PATH
26+
ENV LD_LIBRARY_PATH=$VULKAN_SDK/lib:$LD_LIBRARY_PATH
27+
ENV CMAKE_PREFIX_PATH=$VULKAN_SDK:$CMAKE_PREFIX_PATH
28+
ENV PKG_CONFIG_PATH=$VULKAN_SDK/lib/pkgconfig:$PKG_CONFIG_PATH
1329

1430
# Build it
1531
WORKDIR /app

.editorconfig

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,3 +52,11 @@ insert_final_newline = unset
5252
[vendor/miniaudio/miniaudio.h]
5353
trim_trailing_whitespace = unset
5454
insert_final_newline = unset
55+
56+
[tools/server/webui/**]
57+
indent_style = unset
58+
indent_size = unset
59+
end_of_line = unset
60+
charset = unset
61+
trim_trailing_whitespace = unset
62+
insert_final_newline = unset

0 commit comments

Comments
 (0)