Skip to content

Commit 6e08aef

Browse files
authored
enable int4 tile ci for gemma3 (#15332)
as title
1 parent befd2d1 commit 6e08aef

File tree

1 file changed

+9
-9
lines changed

1 file changed

+9
-9
lines changed

.github/workflows/cuda.yml

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -176,12 +176,12 @@ jobs:
176176
matrix:
177177
quant:
178178
- name: "non-quantized"
179-
artifact: "voxtral-cuda-export"
179+
artifact: "gemma3-cuda-export"
180180
extra_args: ""
181-
# TODO: enable gemma3 quantization
182-
# - name: "quantized-int4-tile-packed"
183-
# artifact: "voxtral-cuda-quantized-int4-tile-packed"
184-
# extra_args: "--qlinear 4w --qlinear_encoder 4w --qlinear_packing_format tile_packed_to_4d --qlinear_encoder_packing_format tile_packed_to_4d"
181+
- name: "quantized-int4-tile-packed"
182+
artifact: "gemma3-cuda-quantized-int4-tile-packed"
183+
extra_args: "--qlinear 4w --qlinear_encoder 4w --qlinear_packing_format tile_packed_to_4d --qlinear_encoder_packing_format tile_packed_to_4d"
184+
# TODO: enable int4-weight-only on gemma3.
185185
# - name: "quantized-int4-weight-only"
186186
# artifact: "voxtral-cuda-quantized-int4-weight-only"
187187
# # TODO: adding "--qlinear 4w" produces invalid results. Need further investigation.
@@ -194,7 +194,7 @@ jobs:
194194
gpu-arch-version: 12.6
195195
use-custom-docker-registry: false
196196
submodules: recursive
197-
upload-artifact: gemma3-cuda-export
197+
upload-artifact: ${{ matrix.quant.artifact }}
198198
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
199199
script: |
200200
set -eux
@@ -435,9 +435,9 @@ jobs:
435435
format:
436436
- name: "non-quantized"
437437
artifact: "gemma3-cuda-export"
438-
# TODO: enable quantized gemma3.
439-
# - name: "quantized-int4-tile-packed"
440-
# artifact: "gemma3-cuda-quantized-int4-tile-packed"
438+
- name: "quantized-int4-tile-packed"
439+
artifact: "gemma3-cuda-quantized-int4-tile-packed"
440+
# TODO: enable int4-weight-only on gemma3.
441441
# - name: "quantized-int4-weight-only"
442442
# artifact: "gemma3-cuda-quantized-int4-weight-only"
443443
with:

0 commit comments

Comments
 (0)