feat: add qwq 32b preview (#443)

Signed-off-by: Sertac Ozercan <[email protected]>
sozercan · Dec 3, 2024 · 98379ef · 98379ef
1 parent 47a7c27
commit 98379ef
Show file tree

Hide file tree

Showing 5 changed files with 27 additions and 2 deletions.
diff --git a/.github/workflows/update-models-self.yaml b/.github/workflows/update-models-self.yaml
@@ -23,6 +23,7 @@ jobs:
          - llama-3.1-70b-instruct
          - mixtral-8x7b-instruct
          - codestral-22b
+         - qwq-32b-preview
     runs-on: self-hosted
     timeout-minutes: 360
     steps:

diff --git a/README.md b/README.md
@@ -96,6 +96,8 @@ If it doesn't include a specific model, you can always [create your own images](
 | 🅿️ Phi 3.5       | Instruct     | 3.8B       | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/phi3.5:3.8b`   | `phi-3.5-3.8b-instruct`  | [MIT](https://huggingface.co/microsoft/Phi-3.5-mini-instruct/resolve/main/LICENSE) |
 | 🔡 Gemma 2       | Instruct     | 2B         | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/gemma2:2b`     | `gemma-2-2b-instruct`    | [Gemma](https://ai.google.dev/gemma/terms)                                         |
 | ⌨️ Codestral 0.1 | Code         | 22B        | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/codestral:22b` | `codestral-22b`          | [MNLP](https://mistral.ai/licenses/MNPL-0.1.md)                                    |
+| QwQ             |              | 32B        | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/qwq:32b`       | `qwq-32b-preview`        | [Apache 2.0](https://huggingface.co/Qwen/QwQ-32B-Preview/blob/main/LICENSE)        |
+
 
 ### NVIDIA CUDA
 
@@ -114,8 +116,10 @@ If it doesn't include a specific model, you can always [create your own images](
 | 🅿️ Phi 3.5       | Instruct      | 3.8B       | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/phi3.5:3.8b`   | `phi-3.5-3.8b-instruct`  | [MIT](https://huggingface.co/microsoft/Phi-3.5-mini-instruct/resolve/main/LICENSE)                                          |
 | 🔡 Gemma 2       | Instruct      | 2B         | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/gemma2:2b`     | `gemma-2-2b-instruct`    | [Gemma](https://ai.google.dev/gemma/terms)                                                                                  |
 | ⌨️ Codestral 0.1 | Code          | 22B        | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/codestral:22b` | `codestral-22b`          | [MNLP](https://mistral.ai/licenses/MNPL-0.1.md)                                                                             |
+| QwQ             |               | 32B        | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/qwq:32b`       | `qwq-32b-preview`        | [Apache 2.0](https://huggingface.co/Qwen/QwQ-32B-Preview/blob/main/LICENSE)                                                 |
 | 📸 Flux 1 Dev    | Text to image | 12B        | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/flux1:dev`     | `flux-1-dev`             | [FLUX.1 [dev] Non-Commercial License](https://github.com/black-forest-labs/flux/blob/main/model_licenses/LICENSE-FLUX1-dev) |
 
+
 ### Apple Silicon (experimental)
 
 > [!NOTE]

diff --git a/models/qwq-32b-preview.yaml b/models/qwq-32b-preview.yaml
@@ -0,0 +1,19 @@
+#syntax=ghcr.io/sozercan/aikit:latest
+apiVersion: v1alpha1
+debug: true
+runtime: cuda
+models:
+  - name: qwq-32b-preview
+    source: https://huggingface.co/lmstudio-community/QwQ-32B-Preview-GGUF/resolve/main/QwQ-32B-Preview-Q4_K_M.gguf
+    sha256: 8389413ff15eabdeae824faa78ca433e7cd61a93a6bee4fb0e916cdb727efcda
+config: |
+  - name: qwq-32b-preview
+    backend: llama
+    parameters:
+      model: QwQ-32B-Preview-Q4_K_M.gguf
+    context_size: 8192
+    repeat_penalty: 1.05
+    flash_attention: true
+    f16: true
+    mmap: true
+    system_prompt: \"You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step.\"
diff --git a/scripts/parse-models.sh b/scripts/parse-models.sh
@@ -23,11 +23,10 @@ extract_model_type() {
     echo "$1" | sed -n -e 's/^flux-[0-9]+-\(dev\)$/\1/p' -e 's/.*\(chat\).*/\1/p' -e 's/.*\(instruct\).*/\1/p'
 }
 
-for MODEL in "llama-2-7b-chat" "llama-2-13b-chat" "llama-3-8b-instruct" "llama-3.1-8b-instruct" "llama-3.2-1b-instruct" "llama-3.2-3b-instruct" "phi-3-3.8b" "phi-3.5-3.8b-instruct" "gemma-2b-instruct" "gemma-2-2b-instruct" "codestral-22b" "llama-3-70b-instruct" "llama-3.1-70b-instruct" "mixtral-8x7b-instruct" "flux-1-dev"; do
+for MODEL in "llama-2-7b-chat" "llama-2-13b-chat" "llama-3-8b-instruct" "llama-3.1-8b-instruct" "llama-3.2-1b-instruct" "llama-3.2-3b-instruct" "phi-3-3.8b" "phi-3.5-3.8b-instruct" "gemma-2b-instruct" "gemma-2-2b-instruct" "codestral-22b" "llama-3-70b-instruct" "llama-3.1-70b-instruct" "mixtral-8x7b-instruct" "flux-1-dev" "qwq-32b-preview"; do
     echo "Model: $MODEL"
     echo " Name: $(extract_model_name "$MODEL")"
     echo " Size: $(extract_model_size "$MODEL")"
     echo " Type: $(extract_model_type "$MODEL")"
     echo
 done
-
diff --git a/website/docs/premade-models.md b/website/docs/premade-models.md
@@ -23,6 +23,7 @@ Depending on your CPU capabilities, AIKit will automatically select the most opt
 | 🅿️ Phi 3.5       | Instruct     | 3.8B       | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/phi3.5:3.8b`   | `phi-3.5-3.8b-instruct`  | [MIT](https://huggingface.co/microsoft/Phi-3.5-mini-instruct/resolve/main/LICENSE) |
 | 🔡 Gemma 2       | Instruct     | 2B         | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/gemma2:2b`     | `gemma-2-2b-instruct`    | [Gemma](https://ai.google.dev/gemma/terms)                                         |
 | ⌨️ Codestral 0.1 | Code         | 22B        | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/codestral:22b` | `codestral-22b`          | [MNLP](https://mistral.ai/licenses/MNPL-0.1.md)                                    |
+| QwQ             |              | 32B        | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/qwq:32b`       | `qwq-32b-preview`        | [Apache 2.0](https://huggingface.co/Qwen/QwQ-32B-Preview/blob/main/LICENSE)        |
 
 ## NVIDIA CUDA
 
@@ -36,6 +37,7 @@ Depending on your CPU capabilities, AIKit will automatically select the most opt
 | 🅿️ Phi 3.5       | Instruct      | 3.8B       | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/phi3.5:3.8b`   | `phi-3.5-3.8b-instruct`  | [MIT](https://huggingface.co/microsoft/Phi-3.5-mini-instruct/resolve/main/LICENSE)                                          |
 | 🔡 Gemma 2       | Instruct      | 2B         | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/gemma2:2b`     | `gemma-2-2b-instruct`    | [Gemma](https://ai.google.dev/gemma/terms)                                                                                  |
 | ⌨️ Codestral 0.1 | Code          | 22B        | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/codestral:22b` | `codestral-22b`          | [MNLP](https://mistral.ai/licenses/MNPL-0.1.md)                                                                             |
+| QwQ             |               | 32B        | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/qwq:32b`       | `qwq-32b-preview`        | [Apache 2.0](https://huggingface.co/Qwen/QwQ-32B-Preview/blob/main/LICENSE)                                                 |
 | 📸 Flux 1 Dev    | Text to image | 12B        | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/flux1:dev`     | `flux-1-dev`             | [FLUX.1 [dev] Non-Commercial License](https://github.com/black-forest-labs/flux/blob/main/model_licenses/LICENSE-FLUX1-dev) |
 
 :::note