diff --git a/.github/workflows/win-webgpu-x64-build.yml b/.github/workflows/win-webgpu-x64-build.yml new file mode 100644 index 0000000000..2f946206ee --- /dev/null +++ b/.github/workflows/win-webgpu-x64-build.yml @@ -0,0 +1,107 @@ +name: "Windows WebGPU x64 Build" +on: + workflow_dispatch: + push: + branches: + - main + - rel-* + pull_request: + +concurrency: + group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.ref || github.sha }} + cancel-in-progress: true + +env: + AZCOPY_AUTO_LOGIN_TYPE: MSI + AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4 + ORT_NIGHTLY_REST_API: "https://feeds.dev.azure.com/aiinfra/PublicPackages/_apis/packaging/Feeds/ORT-Nightly/packages?packageNameQuery=Microsoft.ML.OnnxRuntime&api-version=6.0-preview.1" + ORT_PACKAGE_NAME: "Microsoft.ML.OnnxRuntime" + binaryDir: 'build/cpu/win-x64' + TEST_WEBGPU: 'true' + +jobs: + windows-webgpu-x64-build: + runs-on: ["self-hosted", "1ES.Pool=onnxruntime-genai-Win2022-GPU-A10"] + steps: + - name: Checkout OnnxRuntime GenAI repo + uses: actions/checkout@v5 + with: + submodules: true + + - uses: actions/setup-python@v6 + with: + python-version: '3.12.x' + architecture: 'x64' + + - name: Setup VCPKG + uses: microsoft/onnxruntime-github-actions/setup-build-tools@v0.0.8 + with: + vcpkg-version: '2025.03.19' + vcpkg-hash: '17e96169cd3f266c4716fcdc1bb728e6a64f103941ece463a2834d50694eba4fb48f30135503fd466402afa139abc847ef630733c442595d1c34979f261b0114' + cmake-version: '3.31.6' + cmake-hash: '0f1584e8666cf4a65ec514bd02afe281caabf1d45d2c963f3151c41484f457386aa03273ab25776a670be02725354ce0b46f3a5121857416da37366342a833a0' + add-cmake-to-path: 'true' + disable-terrapin: 'false' + + - uses: actions/setup-dotnet@v5 + with: + dotnet-version: '8.0.x' + + - name: Download OnnxRuntime Foundry Package (includes WebGPU support) + shell: pwsh + run: | + # Use Foundry package which includes WebGPU DLLs (dxil.dll, dxcompiler.dll) + $FOUNDRY_VERSION = "1.25.0-dev-20260210-0905-b214734cba" + Write-Host "Downloading OnnxRuntime.Foundry version: $FOUNDRY_VERSION" + nuget install Microsoft.ML.OnnxRuntime.Foundry -version $FOUNDRY_VERSION -Source https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/nuget/v3/index.json -x -NonInteractive -ExcludeVersion -DependencyVersion Ignore + + - run: Get-ChildItem Microsoft.ML.OnnxRuntime.Foundry -Recurse + continue-on-error: true + + - name: Extract OnnxRuntime library and header files + run: | + mkdir ort/lib + move Microsoft.ML.OnnxRuntime.Foundry/build/native/include ort/ + move Microsoft.ML.OnnxRuntime.Foundry/runtimes/win-x64/native/* ort/lib/ + + - name: Install Rust Toolchain + run: | + $exePath = "$env:TEMP\rustup-init.exe" + (New-Object Net.WebClient).DownloadFile('https://static.rust-lang.org/rustup/dist/x86_64-pc-windows-msvc/rustup-init.exe', $exePath) + & $exePath -y --default-toolchain=1.86.0 + Add-Content $env:GITHUB_PATH "$env:USERPROFILE\.cargo\bin" + + - name: Configure CMake + run: | + cmake --preset windows_x64_cpu_release -DTEST_PHI2=True + + - name: Build with CMake + run: | + cmake --build --preset windows_x64_cpu_release --parallel + cmake --build --preset windows_x64_cpu_release --target PyPackageBuild + + - name: Install Python dependencies and Wheel + run: | + python -m pip install -r test\python\requirements.txt + python -m pip install -r test\python\webgpu\torch\requirements.txt + python -m pip install -r test\python\webgpu\ort\requirements.txt + python -m pip install (Get-ChildItem ("$env:binaryDir\wheel\*.whl")) --no-deps + + - name: Build the Java API and Run the Java Tests + run: | + python build.py --config=Release --build_dir $env:binaryDir --build_java --parallel + + - name: Run the Python Tests + run: | + python test/python/test_onnxruntime_genai.py --cwd "test\python" --test_models "test\test_models" --e2e + + - name: Verify Build Artifacts + if: always() + continue-on-error: true + run: | + Get-ChildItem -Path $env:GITHUB_WORKSPACE\$env:binaryDir -Recurse + + - name: Run C++ Unit Tests + run: |- + copy $env:GITHUB_WORKSPACE\ort\lib\* .\$env:binaryDir\Release + & .\$env:binaryDir\Release\unit_tests.exe diff --git a/.pipelines/stages/jobs/steps/python-validation-step.yml b/.pipelines/stages/jobs/steps/python-validation-step.yml index 1d9cad50e2..1b735b0870 100644 --- a/.pipelines/stages/jobs/steps/python-validation-step.yml +++ b/.pipelines/stages/jobs/steps/python-validation-step.yml @@ -35,6 +35,10 @@ steps: python -m pip install -r test/python/directml/torch/requirements.txt python -m pip install -r test/python/directml/ort/requirements.txt } + elseif ("$(ep)" -eq "webgpu") { + python -m pip install -r test/python/webgpu/torch/requirements.txt + python -m pip install -r test/python/webgpu/ort/requirements.txt + } elseif ("$(arch)" -eq "arm64") { python -m pip install onnxruntime-qnn } @@ -47,6 +51,9 @@ steps: if ("$(ep)" -eq "directml") { python ${{ parameters.PythonScriptName }} -m .\${{ parameters.LocalFolder }}\${{ parameters.ModelFolder }} -e dml --non_interactive + } elseif ("$(ep)" -eq "webgpu") { + $env:TEST_WEBGPU = "true" + python ${{ parameters.PythonScriptName }} -m .\${{ parameters.LocalFolder }}\${{ parameters.ModelFolder }} -e $(ep) --non_interactive } else { python ${{ parameters.PythonScriptName }} -m .\${{ parameters.LocalFolder }}\${{ parameters.ModelFolder }} -e $(ep) --non_interactive } diff --git a/CMakeLists.txt b/CMakeLists.txt index 80eb02b113..502cc06ebc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -133,6 +133,13 @@ if(ENABLE_TESTS) else() add_compile_definitions(TEST_PHI2=0) endif() + + if (USE_WEBGPU) + add_compile_definitions(USE_WEBGPU=1) + else() + add_compile_definitions(USE_WEBGPU=0) + endif() + endif() if(ENABLE_TRACING) diff --git a/test/c_api_tests.cpp b/test/c_api_tests.cpp index cb7b020a0b..b0026435f9 100644 --- a/test/c_api_tests.cpp +++ b/test/c_api_tests.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -17,21 +18,10 @@ #include -#ifndef MODEL_PATH -#define MODEL_PATH "../../test/test_models/" -#endif -#ifndef PHI2_PATH -#if USE_CUDA -#define PHI2_PATH MODEL_PATH "phi-2/int4/cuda" -#elif USE_DML -#define PHI2_PATH MODEL_PATH "phi-2/int4/dml" -#else -#define PHI2_PATH MODEL_PATH "phi-2/int4/cpu" -#endif -#endif +#include "test_utils.h" -#ifndef ENABLE_ENGINE_TESTS -#define ENABLE_ENGINE_TESTS TEST_PHI2 && !USE_DML +#ifndef PHI2_PATH +#define PHI2_PATH test_utils::GetPhi2Path().c_str() #endif TEST(CAPITests, Config) { @@ -316,8 +306,12 @@ TEST(CAPIEngineTests, MaxLength) { #endif // DML doesn't support batch_size > 1 +// TODO: WebGPU should support batch_size > 1, investigate why it's failing TEST(CAPITests, EndToEndPhiBatch) { -#if TEST_PHI2 && !USE_DML +#if TEST_PHI2 + if (!test_utils::IsEngineTestsEnabled()) { + GTEST_SKIP() << "Skipping batch test for DML/WebGPU"; + } auto model = OgaModel::Create(PHI2_PATH); auto tokenizer = OgaTokenizer::Create(*model); @@ -630,7 +624,7 @@ TEST(CAPIEngineTests, EndToEndPhi) { TEST(CAPITests, LoadModelFromMemory) { #if TEST_PHI2 - const char* model_path = PHI2_PATH "/model.onnx"; + std::string model_path = std::string(PHI2_PATH) + "/model.onnx"; std::ifstream model_file(model_path, std::ios::binary | std::ios::ate); ASSERT_TRUE(model_file.is_open()) << "Failed to open model file: " << model_path; std::streamsize size = model_file.tellg(); @@ -925,7 +919,7 @@ TEST(CAPITests, SetTerminate) { #endif } -// DML Doesn't support batch_size > 1 +// DML doesn't support batch_size > 1 #if TEST_PHI2 && !USE_DML struct Phi2Test { @@ -1017,6 +1011,10 @@ class ParametrizedTopKCAPITestsTests : public ::testing::TestWithParam { }; TEST_P(ParametrizedTopKCAPITestsTests, TopKCAPI) { + if (GetParam() && !test_utils::IsEngineTestsEnabled()) { + GTEST_SKIP() << "Skipping Engine test for DML/WebGPU"; + } + Phi2Test test; test.params_->SetSearchOptionBool("do_sample", true); @@ -1038,6 +1036,10 @@ class ParametrizedTopPCAPITestsTests : public ::testing::TestWithParam { }; TEST_P(ParametrizedTopPCAPITestsTests, TopPCAPI) { + if (GetParam() && !test_utils::IsEngineTestsEnabled()) { + GTEST_SKIP() << "Skipping Engine test for DML/WebGPU"; + } + Phi2Test test; test.params_->SetSearchOptionBool("do_sample", true); @@ -1059,6 +1061,10 @@ class ParametrizedTopKTopPCAPITestsTests : public ::testing::TestWithParam }; TEST_P(ParametrizedTopKTopPCAPITestsTests, TopKCAPITest) { + if (GetParam() && !test_utils::IsEngineTestsEnabled()) { + GTEST_SKIP() << "Skipping Engine test for DML/WebGPU"; + } + Phi2Test test; test.params_->SetSearchOptionBool("do_sample", true); diff --git a/test/model_tests.cpp b/test/model_tests.cpp index 25bb1b8b96..fa28b868c3 100644 --- a/test/model_tests.cpp +++ b/test/model_tests.cpp @@ -13,20 +13,13 @@ #include #include +#include "test_utils.h" + // External global variable from main.cpp for custom model path extern std::string g_custom_model_path; -#ifndef MODEL_PATH -#define MODEL_PATH "../../test/test_models/" -#endif #ifndef PHI2_PATH -#if USE_CUDA -#define PHI2_PATH MODEL_PATH "phi-2/int4/cuda" -#elif USE_DML -#define PHI2_PATH MODEL_PATH "phi-2/int4/dml" -#else -#define PHI2_PATH MODEL_PATH "phi-2/int4/cpu" -#endif +#define PHI2_PATH test_utils::GetPhi2Path().c_str() #endif // To generate this file: diff --git a/test/python/test_onnxruntime_genai.py b/test/python/test_onnxruntime_genai.py index b46b815a99..9021f2a41b 100644 --- a/test/python/test_onnxruntime_genai.py +++ b/test/python/test_onnxruntime_genai.py @@ -6,7 +6,6 @@ import os import pathlib import sys -import sysconfig import onnxruntime_genai as og from _test_utils import download_models, run_subprocess @@ -67,6 +66,13 @@ def parse_arguments(): help="Whether to run e2e tests. If not specified e2e tests will not run.", action="store_true", ) + parser.add_argument( + "--eps", + nargs="+", + choices=["cpu", "cuda", "dml", "webgpu"], + default=[], + help="List of execution providers to build models for. If not specified, auto-detects available EPs.", + ) return parser.parse_args() @@ -75,12 +81,27 @@ def main(): log.info("Running onnxruntime-genai tests pipeline") - # Get INT4 ONNX models - output_paths = download_models(os.path.abspath(args.test_models), "int4", "cpu", log) - if og.is_cuda_available(): - output_paths += download_models(os.path.abspath(args.test_models), "int4", "cuda", log) - if og.is_dml_available(): - output_paths += download_models(os.path.abspath(args.test_models), "int4", "dml", log) + # Determine which EPs to build models for + if args.eps: + # User explicitly specified EPs + eps_to_build = args.eps + log.info(f"Building models for explicitly specified EPs: {eps_to_build}") + else: + # Auto-detect available EPs + eps_to_build = ["cpu"] # CPU is always available + if og.is_cuda_available(): + eps_to_build.append("cuda") + if og.is_dml_available(): + eps_to_build.append("dml") + # Only build WebGPU models if TEST_WEBGPU environment variable is set + if og.is_webgpu_available() and os.environ.get("TEST_WEBGPU", "").lower() in ["true", "1", "yes"]: + eps_to_build.append("webgpu") + log.info(f"Auto-detected available EPs: {eps_to_build}") + + # Get INT4 ONNX models for specified/detected EPs + output_paths = [] + for ep in eps_to_build: + output_paths += download_models(os.path.abspath(args.test_models), "int4", ep, log) # Run ONNX Runtime GenAI tests run_onnxruntime_genai_api_tests(os.path.abspath(args.cwd), log, os.path.abspath(args.test_models)) diff --git a/test/python/test_onnxruntime_genai_api.py b/test/python/test_onnxruntime_genai_api.py index 2c85707bf5..602570325a 100644 --- a/test/python/test_onnxruntime_genai_api.py +++ b/test/python/test_onnxruntime_genai_api.py @@ -34,6 +34,9 @@ if og.is_openvino_available(): devices.append("openvino") +if og.is_webgpu_available(): + devices.append("webgpu") + def test_config(test_data_path): model_path = os.fspath(Path(test_data_path) / "hf-internal-testing" / "tiny-random-gpt2-fp32") @@ -172,6 +175,7 @@ def test_greedy_search(test_data_path, relative_model_path): assert np.array_equal(expected_sequence[i], generator.get_sequence(i)) assert int(generator.token_count()) == len(generator.get_sequence(0)) + @pytest.mark.parametrize( "relative_model_path", ( diff --git a/test/python/webgpu/ort/requirements.txt b/test/python/webgpu/ort/requirements.txt new file mode 100644 index 0000000000..aef5432a0f --- /dev/null +++ b/test/python/webgpu/ort/requirements.txt @@ -0,0 +1 @@ +onnxruntime-webgpu==1.25.0.dev20260210001 diff --git a/test/python/webgpu/torch/requirements.txt b/test/python/webgpu/torch/requirements.txt new file mode 100644 index 0000000000..effe87cc0d --- /dev/null +++ b/test/python/webgpu/torch/requirements.txt @@ -0,0 +1,2 @@ +--index-url https://download.pytorch.org/whl/cpu +torch==2.7.1+cpu diff --git a/test/test_utils.h b/test/test_utils.h new file mode 100644 index 0000000000..f686246f32 --- /dev/null +++ b/test/test_utils.h @@ -0,0 +1,54 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include +#include +#include + +#ifndef MODEL_PATH +#define MODEL_PATH "../../test/test_models/" +#endif + +namespace test_utils { + +// Helper function to get the appropriate PHI2 model path based on available models +inline const std::string& GetPhi2Path() { + static std::string phi2_path; + if (!phi2_path.empty()) { + return phi2_path; + } + + std::vector candidate_paths = { + MODEL_PATH "phi-2/int4/cuda", + MODEL_PATH "phi-2/int4/dml", + MODEL_PATH "phi-2/int4/webgpu", + MODEL_PATH "phi-2/int4/cpu"}; + + for (const auto& path : candidate_paths) { + std::filesystem::path model_path(path); + if (std::filesystem::exists(model_path / "genai_config.json")) { + phi2_path = path; + return phi2_path; + } + } + + // Fallback to CPU path + phi2_path = MODEL_PATH "phi-2/int4/cpu"; + return phi2_path; +} + +// Helper to detect if we're using WebGPU or DML EP based on the model path +inline bool IsEngineTestsEnabled() { +#if TEST_PHI2 + std::string path = GetPhi2Path(); + // Skip engine tests for DML and WebGPU (batching not fully tested) + return path.find("/dml") == std::string::npos && + path.find("/webgpu") == std::string::npos; +#else + return false; +#endif +} + +} // namespace test_utils