@@ -176,12 +176,12 @@ jobs:
176176 matrix :
177177 quant :
178178 - name : " non-quantized"
179- artifact : " voxtral -cuda-export"
179+ artifact : " gemma3 -cuda-export"
180180 extra_args : " "
181- # TODO: enable gemma3 quantization
182- # - name : "quantized-int4-tile-packed"
183- # artifact : "voxtral-cuda-quantized-int4-tile-packed "
184- # extra_args: "--qlinear 4w --qlinear_encoder 4w --qlinear_packing_format tile_packed_to_4d --qlinear_encoder_packing_format tile_packed_to_4d"
181+ - name : " quantized-int4-tile-packed "
182+ artifact : " gemma3-cuda- quantized-int4-tile-packed"
183+ extra_args : " --qlinear 4w --qlinear_encoder 4w --qlinear_packing_format tile_packed_to_4d --qlinear_encoder_packing_format tile_packed_to_4d "
184+ # TODO: enable int4-weight-only on gemma3.
185185 # - name: "quantized-int4-weight-only"
186186 # artifact: "voxtral-cuda-quantized-int4-weight-only"
187187 # # TODO: adding "--qlinear 4w" produces invalid results. Need further investigation.
@@ -194,7 +194,7 @@ jobs:
194194 gpu-arch-version : 12.6
195195 use-custom-docker-registry : false
196196 submodules : recursive
197- upload-artifact : gemma3-cuda-export
197+ upload-artifact : ${{ matrix.quant.artifact }}
198198 ref : ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
199199 script : |
200200 set -eux
@@ -435,9 +435,9 @@ jobs:
435435 format :
436436 - name : " non-quantized"
437437 artifact : " gemma3-cuda-export"
438- # TODO: enable quantized gemma3.
439- # - name : "quantized-int4-tile-packed"
440- # artifact: "gemma3-cuda-quantized- int4-tile-packed"
438+ - name : " quantized-int4-tile-packed "
439+ artifact : " gemma3-cuda- quantized-int4-tile-packed"
440+ # TODO: enable int4-weight-only on gemma3.
441441 # - name: "quantized-int4-weight-only"
442442 # artifact: "gemma3-cuda-quantized-int4-weight-only"
443443 with :
0 commit comments