@@ -165,14 +165,27 @@ jobs:
165165        echo "::endgroup::" 
166166
167167export-gemma3-cuda-artifact :
168-     name : export-gemma3-cuda-artifact  
168+     name : export-gemma3-cuda-${{ matrix.quant.name }}  
169169    uses : pytorch/test-infra/.github/workflows/linux_job_v2.yml@main 
170170    permissions :
171171      id-token : write 
172172      contents : read 
173173    secrets : inherit 
174174    strategy :
175175      fail-fast : false 
176+       matrix :
177+         quant :
178+           - name : " non-quantized" 
179+             artifact : " voxtral-cuda-export" 
180+             extra_args : " " 
181+           #  TODO: enable gemma3 quantization
182+           #  - name: "quantized-int4-tile-packed"
183+           #    artifact: "voxtral-cuda-quantized-int4-tile-packed"
184+           #    extra_args: "--qlinear 4w --qlinear_encoder 4w --qlinear_packing_format tile_packed_to_4d --qlinear_encoder_packing_format tile_packed_to_4d"
185+           #  - name: "quantized-int4-weight-only"
186+           #    artifact: "voxtral-cuda-quantized-int4-weight-only"
187+           #    # TODO: adding "--qlinear 4w" produces invalid results. Need further investigation.
188+           #    extra_args: "--qlinear_encoder 4w"
176189    with :
177190      timeout : 90 
178191      secrets-env : EXECUTORCH_HF_TOKEN 
@@ -198,7 +211,8 @@ jobs:
198211        pip list 
199212        echo "::endgroup::" 
200213
201-         echo "::group::Export Gemma3" 
214+         echo "::group::Export Gemma3 (${{ matrix.quant.name }})" 
215+         EXTRA_ARGS="${{ matrix.quant.extra_args }}" 
202216        optimum-cli export executorch \ 
203217            --model "google/gemma-3-4b-it" \ 
204218            --task "multimodal-text-to-text" \ 
@@ -212,7 +226,7 @@ jobs:
212226        test -f aoti_cuda_blob.ptd 
213227        echo "::endgroup::" 
214228
215-         echo "::group::Store Gemma3 Artifacts" 
229+         echo "::group::Store Gemma3 Artifacts (${{ matrix.quant.name }}) " 
216230        mkdir -p "${RUNNER_ARTIFACT_DIR}/" 
217231        cp model.pte "${RUNNER_ARTIFACT_DIR}/" 
218232        cp aoti_cuda_blob.ptd "${RUNNER_ARTIFACT_DIR}/" 
@@ -407,3 +421,87 @@ jobs:
407421          exit $EXIT_CODE 
408422        fi 
409423        echo "::endgroup::" 
424+ 
425+ test-gemma3-cuda-e2e :
426+     name : test-gemma3-cuda-e2e-${{ matrix.format.name }} 
427+     needs : export-gemma3-cuda-artifact 
428+     uses : pytorch/test-infra/.github/workflows/linux_job_v2.yml@main 
429+     permissions :
430+       id-token : write 
431+       contents : read 
432+     strategy :
433+       fail-fast : false 
434+       matrix :
435+         format :
436+           - name : " non-quantized" 
437+             artifact : " gemma3-cuda-export" 
438+           #  TODO: enable quantized gemma3.
439+           #  - name: "quantized-int4-tile-packed"
440+           #    artifact: "gemma3-cuda-quantized-int4-tile-packed"
441+           #  - name: "quantized-int4-weight-only"
442+           #    artifact: "gemma3-cuda-quantized-int4-weight-only"
443+     with :
444+       timeout : 90 
445+       runner : linux.g5.4xlarge.nvidia.gpu 
446+       gpu-arch-type : cuda 
447+       gpu-arch-version : 12.6 
448+       use-custom-docker-registry : false 
449+       submodules : recursive 
450+       download-artifact : ${{ matrix.format.artifact }} 
451+       ref : ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} 
452+       script : | 
453+         set -eux 
454+ 
455+         echo "::group::Setup ExecuTorch Requirements" 
456+         ./install_requirements.sh 
457+         pip list 
458+         echo "::endgroup::" 
459+ 
460+         echo "::group::Prepare Gemma3 Artifacts (${{ matrix.format.name }})" 
461+         cp "${RUNNER_ARTIFACT_DIR}/model.pte" . 
462+         cp "${RUNNER_ARTIFACT_DIR}/aoti_cuda_blob.ptd" . 
463+         TOKENIZER_URL="https://huggingface.co/unsloth/gemma-3-1b-it/resolve/main/tokenizer.json" 
464+         curl -L $TOKENIZER_URL -o tokenizer.json 
465+         ls -al model.pte aoti_cuda_blob.ptd tokenizer.json 
466+         IMAGE_PATH="docs/source/_static/img/et-logo.png" 
467+         echo "::endgroup::" 
468+ 
469+         echo "::group::Build Gemma3 Runner" 
470+         cmake --preset llm \ 
471+               -DEXECUTORCH_BUILD_CUDA=ON \ 
472+               -DCMAKE_INSTALL_PREFIX=cmake-out \ 
473+               -DCMAKE_BUILD_TYPE=Release \ 
474+               -Bcmake-out -S. 
475+         cmake --build cmake-out -j$(( $(nproc) - 1 )) --target install --config Release 
476+ 
477+         cmake -DEXECUTORCH_BUILD_CUDA=ON \ 
478+               -DCMAKE_BUILD_TYPE=Release \ 
479+               -Sexamples/models/gemma3 \ 
480+               -Bcmake-out/examples/models/gemma3/ 
481+         cmake --build cmake-out/examples/models/gemma3 --target gemma3_e2e_runner --config Release 
482+         echo "::endgroup::" 
483+ 
484+         echo "::group::Run Gemma3 Runner (${{ matrix.format.name }})" 
485+         set +e 
486+         export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH 
487+         OUTPUT=$(cmake-out/examples/models/gemma3/gemma3_e2e_runner \ 
488+               --model_path model.pte \ 
489+               --data_path aoti_cuda_blob.ptd \ 
490+               --tokenizer_path tokenizer.json \ 
491+               --image_path $IMAGE_PATH \ 
492+               --temperature 0 2>&1) 
493+         EXIT_CODE=$? 
494+         set -e 
495+ 
496+         echo "$OUTPUT" 
497+ 
498+         if ! echo "$OUTPUT" | grep -iq "chip"; then 
499+           echo "Expected output 'chip' not found in output" 
500+           exit 1 
501+         fi 
502+ 
503+         if [ $EXIT_CODE -ne 0 ]; then 
504+           echo "Unexpected exit code: $EXIT_CODE" 
505+           exit $EXIT_CODE 
506+         fi 
507+         echo "::endgroup::" 
0 commit comments