Skip to content

Commit

Permalink
Merge remote-tracking branch 'ggerganov/master'
Browse files Browse the repository at this point in the history
* ggerganov/master:
  whisper : Replace WHISPER_PRINT_DEBUG with WHISPER_LOG_DEBUG (ggerganov#1681)
  sync : ggml (ggml_scale, ggml_row_size, etc.) (ggerganov#1677)
  docker :  Dockerize whisper.cpp (ggerganov#1674)
  CI : Add coverage for talk-llama when WHISPER_CUBLAS=1 (ggerganov#1672)
  examples : Revert CMakeLists.txt for talk-llama (ggerganov#1669)
  cmake : set default CUDA architectures (ggerganov#1667)
  • Loading branch information
bygreencn committed Dec 25, 2023
2 parents b63ec23 + 37a709f commit d8e9321
Show file tree
Hide file tree
Showing 26 changed files with 3,716 additions and 1,646 deletions.
34 changes: 34 additions & 0 deletions .devops/main-cuda.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
ARG UBUNTU_VERSION=22.04
# This needs to generally match the container host's environment.
ARG CUDA_VERSION=12.3.1
# Target the CUDA build image
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
# Target the CUDA runtime image
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}

FROM ${BASE_CUDA_DEV_CONTAINER} AS build
WORKDIR /app

# Unless otherwise specified, we make a fat build.
ARG CUDA_DOCKER_ARCH=all
# Set nvcc architecture
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
# Enable cuBLAS
ENV WHISPER_CUBLAS=1

RUN apt-get update && \
apt-get install -y build-essential \
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*

COPY .. .
RUN make

FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
WORKDIR /app

RUN apt-get update && \
apt-get install -y curl ffmpeg \
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*

COPY --from=build /app /app
ENTRYPOINT [ "bash", "-c" ]
19 changes: 19 additions & 0 deletions .devops/main.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
FROM ubuntu:22.04 AS build
WORKDIR /app

RUN apt-get update && \
apt-get install -y build-essential \
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*

COPY .. .
RUN make

FROM ubuntu:22.04 AS runtime
WORKDIR /app

RUN apt-get update && \
apt-get install -y curl ffmpeg \
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*

COPY --from=build /app /app
ENTRYPOINT [ "bash", "-c" ]
10 changes: 5 additions & 5 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ jobs:
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
set -e
apt update
apt install -y clang
apt install -y clang build-essential cmake libsdl2-dev
cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
make
Expand Down Expand Up @@ -167,7 +166,7 @@ jobs:
s2arc: x64
jnaPath: win32-x86-64
- sdl2: ON
s2ver: 2.26.0
s2ver: 2.28.5

steps:
- name: Clone
Expand Down Expand Up @@ -228,7 +227,7 @@ jobs:
obzip: https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.25/OpenBLAS-0.3.25-x64.zip
s2arc: x64
- sdl2: ON
s2ver: 2.26.0
s2ver: 2.28.5

steps:
- name: Clone
Expand Down Expand Up @@ -295,7 +294,7 @@ jobs:
- arch: x64
s2arc: x64
- sdl2: ON
s2ver: 2.26.0
s2ver: 2.28.5

steps:
- name: Clone
Expand All @@ -321,7 +320,8 @@ jobs:
run: >
cmake -S . -B ./build -A ${{ matrix.arch }}
-DCMAKE_BUILD_TYPE=${{ matrix.build }}
-DWHISPER_CUBLAS=1
-DWHISPER_CUBLAS=${{ matrix.cublas }}
-DWHISPER_SDL2=${{ matrix.sdl2 }}
- name: Build ${{ matrix.cuda-toolkit }}
run: |
Expand Down
57 changes: 57 additions & 0 deletions .github/workflows/docker.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
name: Publish Docker image

on:
pull_request:
push:
branches:
- master

jobs:
push_to_registry:
name: Push Docker image to Docker Hub
if: github.event.pull_request.draft == false

runs-on: ubuntu-latest
env:
COMMIT_SHA: ${{ github.sha }}
strategy:
matrix:
config:
- { tag: "main", dockerfile: ".devops/main.Dockerfile", platform: "linux/amd64,linux/arm64" }
- { tag: "main-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platform: "linux/amd64" }

steps:
- name: Check out the repo
uses: actions/checkout@v3

- name: Set up QEMU
uses: docker/setup-qemu-action@v3

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Log in to Docker Hub
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Build and push Docker image (versioned)
if: github.event_name == 'push'
uses: docker/build-push-action@v5
with:
context: .
push: true
platforms: ${{ matrix.config.platforms }}
tags: "ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
file: ${{ matrix.config.dockerfile }}

- name: Build and push Docker image (tagged)
uses: docker/build-push-action@v4
with:
context: .
push: ${{ github.event_name == 'push' }}
platforms: ${{ matrix.config.platforms }}
tags: "ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}"
file: ${{ matrix.config.dockerfile }}
8 changes: 7 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,13 @@ endif()

if (GGML_SOURCES_CUDA)
message(STATUS "GGML CUDA sources found, configuring CUDA architecture")
set_property(TARGET whisper PROPERTY CUDA_ARCHITECTURES OFF)
# Only configure gmml CUDA architectures is not globally set
if (NOT DEFINED GGML_CUDA_ARCHITECTURES)
# Not overriden by user, so set defaults
set(GGML_CUDA_ARCHITECTURES 52 61 70)
endif()
message(STATUS "GGML Configuring CUDA architectures ${GGML_CUDA_ARCHITECTURES}")
set_property(TARGET whisper PROPERTY CUDA_ARCHITECTURES ${GGML_CUDA_ARCHITECTURES})
set_property(TARGET whisper PROPERTY CUDA_SELECT_NVCC_ARCH_FLAGS "Auto")
endif()

Expand Down
31 changes: 31 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ Supported platforms:
- [x] [WebAssembly](examples/whisper.wasm)
- [x] Windows ([MSVC](https://github.com/ggerganov/whisper.cpp/blob/master/.github/workflows/build.yml#L117-L144) and [MinGW](https://github.com/ggerganov/whisper.cpp/issues/168)]
- [x] [Raspberry Pi](https://github.com/ggerganov/whisper.cpp/discussions/166)
- [x] [docker](https://github.com/ggerganov/whisper.cpp/pkgs/container/whisper.cpp)

The entire high-level implementation of the model is contained in [whisper.h](whisper.h) and [whisper.cpp](whisper.cpp).
The rest of the code is part of the [ggml](https://github.com/ggerganov/ggml) machine learning library.
Expand Down Expand Up @@ -448,6 +449,36 @@ make clean
WHISPER_OPENBLAS=1 make -j
```
## Docker
### Prerequisites
* Docker must be installed and running on your system.
* Create a folder to store big models & intermediate files (ex. /whisper/models)
### Images
We have two Docker images available for this project:
1. `ghcr.io/ggerganov/whisper.cpp:main`: This image includes the main executable file as well as `curl` and `ffmpeg`. (platforms: `linux/amd64`, `linux/arm64`)
2. `ghcr.io/ggerganov/whisper.cpp:main-cuda`: Same as `main` but compiled with CUDA support. (platforms: `linux/amd64`)
### Usage
```shell
# download model and persist it in a local folder
docker run -it --rm \
-v path/to/models:/models \
whisper.cpp:main "./models/download-ggml-model.sh base /models"
# transcribe an audio file
docker run -it --rm \
-v path/to/models:/models \
-v path/to/audios:/audios \
whisper.cpp:main "./main -m /models/ggml-base.bin -f /audios/jfk.wav"
# transcribe an audio file in samples folder
docker run -it --rm \
-v path/to/models:/models \
whisper.cpp:main "./main -m /models/ggml-base.bin -f ./samples/jfk.wav"
```
## Limitations
- Inference only
Expand Down
26 changes: 5 additions & 21 deletions examples/talk-llama/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,30 +1,14 @@
if (WHISPER_SDL2)
# talk-llama
set(TARGET talk-llama)
#add_executable(${TARGET} talk-llama.cpp llama.cpp)
#target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS})
#target_link_libraries(${TARGET} PRIVATE common common-sdl whisper ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})

# TODO: this is temporary
# need to export ggml symbols for MSVC, but too lazy ..
add_executable(${TARGET}
talk-llama.cpp
llama.cpp
../common.cpp
../common-sdl.cpp
../../ggml.c
../../ggml-alloc.c
../../ggml-backend.c
../../ggml-quants.c
../../whisper.cpp)
add_executable(${TARGET} talk-llama.cpp llama.cpp)
target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS})
target_link_libraries(${TARGET} PRIVATE common common-sdl whisper ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})

if(WIN32)
# It requires Windows 8.1 or later for PrefetchVirtualMemory
target_compile_definitions(${TARGET} PRIVATE -D_WIN32_WINNT=0x0602)
# It requires Windows 8.1 or later for PrefetchVirtualMemory
target_compile_definitions(${TARGET} PRIVATE -D_WIN32_WINNT=0x0602)
endif()

target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS} ../../)
target_link_libraries(${TARGET} PRIVATE ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})

include(DefaultTargetOptions)
endif ()
Loading

0 comments on commit d8e9321

Please sign in to comment.