8787        export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH 
8888        PYTHON_EXECUTABLE=python source .ci/scripts/test_model.sh "${{ matrix.model }}" cmake cuda 
8989
90- test -voxtral-cuda-e2e 
91-     name : test -voxtral-cuda-e2e 
90+ export -voxtral-cuda-artifact 
91+     name : export -voxtral-cuda-artifact 
9292    uses : pytorch/test-infra/.github/workflows/linux_job_v2.yml@main 
9393    permissions :
9494      id-token : write 
@@ -104,6 +104,7 @@ jobs:
104104      gpu-arch-version : 12.6 
105105      use-custom-docker-registry : false 
106106      submodules : recursive 
107+       upload-artifact : voxtral-cuda-export 
107108      ref : ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} 
108109      script : | 
109110        set -eux 
@@ -118,6 +119,7 @@ jobs:
118119        OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt) 
119120        pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION} 
120121        pip install mistral-common librosa 
122+         pip list 
121123        echo "::endgroup::" 
122124
123125        echo "::group::Export Voxtral" 
@@ -129,43 +131,152 @@ jobs:
129131            --device cuda \ 
130132            --max_seq_len 1024 \ 
131133            --output_dir ./ 
134+         python -m executorch.extension.audio.mel_spectrogram \ 
135+             --feature_size 128 \ 
136+             --stack_output \ 
137+             --max_audio_len 300 \ 
138+             --output_file voxtral_preprocessor.pte 
139+ 
140+         test -f model.pte 
141+         test -f aoti_cuda_blob.ptd 
142+         test -f voxtral_preprocessor.pte 
132143        echo "::endgroup::" 
133144
134-         echo "::group::Build Voxtral Runner" 
145+         echo "::group::Store Voxtral Artifacts" 
146+         mkdir -p "${RUNNER_ARTIFACT_DIR}" 
147+         cp model.pte "${RUNNER_ARTIFACT_DIR}/" 
148+         cp aoti_cuda_blob.ptd "${RUNNER_ARTIFACT_DIR}/" 
149+         cp voxtral_preprocessor.pte "${RUNNER_ARTIFACT_DIR}/" 
150+         ls -al "${RUNNER_ARTIFACT_DIR}" 
151+         echo "::endgroup::" 
152+ 
153+ benchmark-voxtral-cuda :
154+     name : benchmark-voxtral-cuda 
155+     needs : export-voxtral-cuda-artifact 
156+     uses : pytorch/test-infra/.github/workflows/linux_job_v2.yml@main 
157+     permissions :
158+       id-token : write 
159+       contents : read 
160+     strategy :
161+       fail-fast : false 
162+     with :
163+       timeout : 90 
164+       runner : linux.g5.4xlarge.nvidia.gpu 
165+       gpu-arch-type : cuda 
166+       gpu-arch-version : 12.6 
167+       use-custom-docker-registry : false 
168+       submodules : recursive 
169+       download-artifact : voxtral-cuda-export 
170+       ref : ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} 
171+       script : | 
172+         set -eux 
173+ 
174+         echo "::group::Setup ExecuTorch Requirements" 
175+         CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_requirements.sh 
176+         pip list 
177+         echo "::endgroup::" 
178+ 
179+         echo "::group::Prepare Voxtral Artifacts" 
180+         cp "${RUNNER_ARTIFACT_DIR}/model.pte" . 
181+         cp "${RUNNER_ARTIFACT_DIR}/aoti_cuda_blob.ptd" . 
182+         ls -al model.pte aoti_cuda_blob.ptd 
183+         echo "::endgroup::" 
184+ 
185+         echo "::group::Build Voxtral Benchmark" 
135186        cmake -DCMAKE_BUILD_TYPE=Release \ 
136187              -DEXECUTORCH_BUILD_CUDA=ON \ 
137188              -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ 
138189              -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ 
190+               -DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \ 
139191              -DEXECUTORCH_BUILD_TESTS=ON \ 
140192              -Bcmake-out . 
141193        cmake --build cmake-out -j$(( $(nproc) - 1 )) --target voxtral_runner 
142194        echo "::endgroup::" 
143195
196+         echo "::group::Run Voxtral Benchmark" 
197+ 
198+         export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH 
199+         cmake-out/backends/cuda/voxtral_runner model.pte aoti_cuda_blob.ptd 
200+ 
201+         echo "::endgroup::" 
202+ 
203+ test-voxtral-cuda-e2e :
204+     name : test-voxtral-cuda-e2e 
205+     needs : export-voxtral-cuda-artifact 
206+     uses : pytorch/test-infra/.github/workflows/linux_job_v2.yml@main 
207+     permissions :
208+       id-token : write 
209+       contents : read 
210+     strategy :
211+       fail-fast : false 
212+     with :
213+       timeout : 90 
214+       runner : linux.g5.4xlarge.nvidia.gpu 
215+       gpu-arch-type : cuda 
216+       gpu-arch-version : 12.6 
217+       use-custom-docker-registry : false 
218+       submodules : recursive 
219+       download-artifact : voxtral-cuda-export 
220+       ref : ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} 
221+       script : | 
222+         set -eux 
223+ 
224+         echo "::group::Setup ExecuTorch Requirements" 
225+         CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_requirements.sh 
226+         pip list 
227+         echo "::endgroup::" 
228+ 
229+         echo "::group::Prepare Voxtral Artifacts" 
230+         cp "${RUNNER_ARTIFACT_DIR}/model.pte" . 
231+         cp "${RUNNER_ARTIFACT_DIR}/aoti_cuda_blob.ptd" . 
232+         cp "${RUNNER_ARTIFACT_DIR}/voxtral_preprocessor.pte" . 
233+         TOKENIZER_URL="https://huggingface.co/mistralai/Voxtral-Mini-3B-2507/resolve/main/tekken.json" 
234+         curl -L $TOKENIZER_URL -o tekken.json 
235+         ls -al model.pte aoti_cuda_blob.ptd voxtral_preprocessor.pte tekken.json 
236+         echo "::endgroup::" 
237+ 
238+         echo "::group::Download Test Audio File" 
239+         AUDIO_URL="https://github.com/voxserv/audio_quality_testing_samples/raw/refs/heads/master/testaudio/16000/test01_20s.wav" 
240+         curl -L $AUDIO_URL -o poem.wav 
241+         echo "::endgroup::" 
242+ 
243+         echo "::group::Build Voxtral Runner" 
244+         cmake --preset llm \ 
245+               -DEXECUTORCH_BUILD_CUDA=ON \ 
246+               -DCMAKE_INSTALL_PREFIX=cmake-out \ 
247+               -DCMAKE_BUILD_TYPE=Release \ 
248+               -Bcmake-out -S. 
249+         cmake --build cmake-out -j$(( $(nproc) - 1 )) --target install --config Release 
250+ 
251+         cmake -DEXECUTORCH_BUILD_CUDA=ON \ 
252+               -DCMAKE_BUILD_TYPE=Release \ 
253+               -Sexamples/models/voxtral \ 
254+               -Bcmake-out/examples/models/voxtral/ 
255+         cmake --build cmake-out/examples/models/voxtral --target voxtral_runner --config Release 
256+         echo "::endgroup::" 
257+ 
144258        echo "::group::Run Voxtral Runner" 
145-         # Capture output and allow exit code 139 if we have the expected printout 
146259        set +e 
147260        export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH 
148-         OUTPUT=$(cmake-out/backends/cuda/voxtral_runner model.pte aoti_cuda_blob.ptd 2>&1) 
261+         OUTPUT=$(cmake-out/examples/models/voxtral/voxtral_runner \ 
262+               --model_path model.pte \ 
263+               --data_path aoti_cuda_blob.ptd \ 
264+               --tokenizer_path tekken.json \ 
265+               --audio_path poem.wav \ 
266+               --processor_path voxtral_preprocessor.pte \ 
267+               --temperature 0 2>&1) 
149268        EXIT_CODE=$? 
150269        set -e 
151270
152271        echo "$OUTPUT" 
153272
154-         # Check if the output contains "Run latency (ms):" 
155-         if echo "$OUTPUT" | grep -q "Run latency (ms):"; then 
156-           echo "Found expected output: 'Run latency (ms):'" 
157-           if [ $EXIT_CODE -eq 139 ]; then 
158-             echo "Exit code 139 (segfault) detected, but passing since we have the expected output" 
159-             exit 0 
160-           elif [ $EXIT_CODE -ne 0 ]; then 
161-             echo "Unexpected exit code: $EXIT_CODE" 
162-             exit $EXIT_CODE 
163-           else 
164-             echo "Command succeeded with exit code 0" 
165-             exit 0 
166-           fi 
167-         else 
168-           echo "Expected output 'Run latency (ms):' not found in output" 
273+         if ! echo "$OUTPUT" | grep -iq "poem"; then 
274+           echo "Expected output 'poem' not found in output" 
169275          exit 1 
170276        fi 
277+ 
278+         if [ $EXIT_CODE -ne 0 ]; then 
279+           echo "Unexpected exit code: $EXIT_CODE" 
280+           exit $EXIT_CODE 
281+         fi 
171282        echo "::endgroup::" 
0 commit comments