Skip to content
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 145 additions & 0 deletions .github/workflows/win-webgpu-x64-build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
name: "Windows WebGPU x64 Build"
on:
workflow_dispatch:
push:
branches:
- main
- rel-*
pull_request:

concurrency:
group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.ref || github.sha }}
cancel-in-progress: true

env:
AZCOPY_AUTO_LOGIN_TYPE: MSI
AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4
ORT_NIGHTLY_REST_API: "https://feeds.dev.azure.com/aiinfra/PublicPackages/_apis/packaging/Feeds/ORT-Nightly/packages?packageNameQuery=Microsoft.ML.OnnxRuntime&api-version=6.0-preview.1"
ORT_PACKAGE_NAME: "Microsoft.ML.OnnxRuntime"
binaryDir: 'build/webgpu/win-x64'
TEST_WEBGPU: 'true'

jobs:
windows-webgpu-x64-build:
runs-on: ["self-hosted", "1ES.Pool=onnxruntime-genai-Win2022-GPU-A10"]
steps:
- name: Checkout OnnxRuntime GenAI repo
uses: actions/checkout@v5
with:
submodules: true

- uses: actions/setup-python@v6
with:
python-version: '3.12.x'
architecture: 'x64'

- name: Setup VCPKG
uses: microsoft/onnxruntime-github-actions/setup-build-tools@v0.0.8
with:
vcpkg-version: '2025.03.19'
vcpkg-hash: '17e96169cd3f266c4716fcdc1bb728e6a64f103941ece463a2834d50694eba4fb48f30135503fd466402afa139abc847ef630733c442595d1c34979f261b0114'
cmake-version: '3.31.6'
cmake-hash: '0f1584e8666cf4a65ec514bd02afe281caabf1d45d2c963f3151c41484f457386aa03273ab25776a670be02725354ce0b46f3a5121857416da37366342a833a0'
add-cmake-to-path: 'true'
disable-terrapin: 'false'

- uses: actions/setup-dotnet@v5
with:
dotnet-version: '8.0.x'

- name: Download OnnxRuntime Nightly (CPU package for headers and lib)
Comment thread
qjia7 marked this conversation as resolved.
Outdated
shell: pwsh
run: |
$resp = Invoke-RestMethod "${{ env.ORT_NIGHTLY_REST_API }}"
$ORT_NIGHTLY_VERSION = $resp.value[0].versions[0].normalizedVersion
Write-Host "OnnxRuntime version: $ORT_NIGHTLY_VERSION"
"ORT_NIGHTLY_VERSION=$ORT_NIGHTLY_VERSION" | Out-File -FilePath $env:GITHUB_ENV -Append
nuget install ${{ env.ORT_PACKAGE_NAME }} -version $ORT_NIGHTLY_VERSION -x -NonInteractive

- run: Get-ChildItem ${{ env.ORT_PACKAGE_NAME }} -Recurse
continue-on-error: true

- name: Extract OnnxRuntime library and header files
run: |
mkdir ort/lib
move ${{ env.ORT_PACKAGE_NAME }}/build/native/include ort/
move ${{ env.ORT_PACKAGE_NAME }}/runtimes/win-x64/native/* ort/lib/

- name: Install Rust Toolchain
run: |
$exePath = "$env:TEMP\rustup-init.exe"
(New-Object Net.WebClient).DownloadFile('https://static.rust-lang.org/rustup/dist/x86_64-pc-windows-msvc/rustup-init.exe', $exePath)
& $exePath -y --default-toolchain=1.86.0
Add-Content $env:GITHUB_PATH "$env:USERPROFILE\.cargo\bin"

- name: Configure CMake
run: |
cmake --preset windows_x64_webgpu_release -DTEST_PHI2=True

- name: Install Python dependencies to get WebGPU OnnxRuntime
run: |
python -m pip install -r test\python\requirements.txt
Comment thread
qjia7 marked this conversation as resolved.
Outdated
python -m pip install -r test\python\webgpu\torch\requirements.txt
python -m pip install -r test\python\webgpu\ort\requirements.txt

- name: Replace onnxruntime CPU DLLs with WebGPU DLLs
shell: pwsh
run: |
Write-Host "Replacing onnxruntime CPU DLLs with WebGPU DLLs before building..."

# Find onnxruntime-webgpu package location (installed via pip)
$ortWebGpuLocation = python -c "import onnxruntime; import os; print(os.path.dirname(onnxruntime.__file__))"
Write-Host "onnxruntime-webgpu package location: $ortWebGpuLocation"

# Find the capi directory where WebGPU DLLs are located
$webgpuCapiDir = Join-Path $ortWebGpuLocation "capi"
if (-not (Test-Path $webgpuCapiDir)) {
Write-Error "Could not find onnxruntime capi directory at: $webgpuCapiDir"
exit 1
}

# Copy WebGPU DLLs to ort/lib (for building genai with WebGPU support)
Write-Host "`nCopying WebGPU DLLs to ort/lib..."
$dllsToCopy = @("onnxruntime.dll", "dxil.dll", "dxcompiler.dll")
foreach ($dll in $dllsToCopy) {
$sourcePath = Join-Path $webgpuCapiDir $dll
if (Test-Path $sourcePath) {
Copy-Item -Path $sourcePath -Destination "$env:GITHUB_WORKSPACE\ort\lib\$dll" -Force
Write-Host " Copied: $dll"
} else {
Write-Host " Warning: $dll not found at $sourcePath"
}
}

Write-Host "`nWebGPU DLLs successfully replaced in ort/lib/"
Get-ChildItem "$env:GITHUB_WORKSPACE\ort\lib\*.dll" | ForEach-Object {
Write-Host " - $($_.Name) ($([math]::Round($_.Length / 1MB, 2)) MB)"
}

- name: Build with CMake
run: |
cmake --build --preset windows_x64_webgpu_release --parallel
cmake --build --preset windows_x64_webgpu_release --target PyPackageBuild

- name: Install the Python Wheel
run: |
python -m pip install (Get-ChildItem ("$env:binaryDir\wheel\*.whl")) --no-deps

- name: Build the Java API and Run the Java Tests
run: |
python build.py --config=Release --build_dir $env:binaryDir --build_java --parallel

- name: Run the Python Tests
run: |
python test/python/test_onnxruntime_genai.py --cwd "test\python" --test_models "test\test_models" --e2e

- name: Verify Build Artifacts
if: always()
continue-on-error: true
run: |
Get-ChildItem -Path $env:GITHUB_WORKSPACE\$env:binaryDir -Recurse

- name: Run C++ Unit Tests
run: |-
copy $env:GITHUB_WORKSPACE\ort\lib\* .\$env:binaryDir\Release
& .\$env:binaryDir\Release\unit_tests.exe
7 changes: 7 additions & 0 deletions .pipelines/stages/jobs/steps/python-validation-step.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ steps:
python -m pip install -r test/python/directml/torch/requirements.txt
python -m pip install -r test/python/directml/ort/requirements.txt
}
elseif ("$(ep)" -eq "webgpu") {
python -m pip install -r test/python/webgpu/torch/requirements.txt
python -m pip install -r test/python/webgpu/ort/requirements.txt
}
elseif ("$(arch)" -eq "arm64") {
python -m pip install onnxruntime-qnn
}
Expand All @@ -47,6 +51,9 @@ steps:

if ("$(ep)" -eq "directml") {
python ${{ parameters.PythonScriptName }} -m .\${{ parameters.LocalFolder }}\${{ parameters.ModelFolder }} -e dml --non_interactive
} elseif ("$(ep)" -eq "webgpu") {
$env:TEST_WEBGPU = "true"
python ${{ parameters.PythonScriptName }} -m .\${{ parameters.LocalFolder }}\${{ parameters.ModelFolder }} -e $(ep) --non_interactive
} else {
python ${{ parameters.PythonScriptName }} -m .\${{ parameters.LocalFolder }}\${{ parameters.ModelFolder }} -e $(ep) --non_interactive
}
Expand Down
7 changes: 7 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,13 @@ if(ENABLE_TESTS)
else()
add_compile_definitions(TEST_PHI2=0)
endif()

if (USE_WEBGPU)
add_compile_definitions(USE_WEBGPU=1)
else()
add_compile_definitions(USE_WEBGPU=0)
endif()

endif()

if(ENABLE_TRACING)
Expand Down
20 changes: 20 additions & 0 deletions cmake/presets/CMakeWinBuildPresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,26 @@
"configuration": "MinSizeRel",
"configurePreset": "windows_x64_directml_minsizerel"
},
{
Comment thread
qjia7 marked this conversation as resolved.
Outdated
"name": "windows_x64_webgpu_release",
"configuration": "Release",
"configurePreset": "windows_x64_webgpu_release"
},
{
"name": "windows_x64_webgpu_debug",
"configuration": "Debug",
"configurePreset": "windows_x64_webgpu_debug"
},
{
"name": "windows_x64_webgpu_relwithdebinfo",
"configuration": "RelWithDebInfo",
"configurePreset": "windows_x64_webgpu_relwithdebinfo"
},
{
"name": "windows_x64_webgpu_minsizerel",
"configuration": "MinSizeRel",
"configurePreset": "windows_x64_webgpu_minsizerel"
},
{
"name": "windows_arm64_cpu_relwithdebinfo",
"configuration": "RelWithDebInfo",
Expand Down
43 changes: 43 additions & 0 deletions cmake/presets/CMakeWinConfigPresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,13 @@
"USE_DML": "ON"
}
},
{
"name": "windows_webgpu_default",
"inherits": "windows_cpu_default",
"cacheVariables": {
"USE_WEBGPU": "ON"
}
},
{
"name": "windows_release_default",
"cacheVariables": {
Expand Down Expand Up @@ -385,6 +392,42 @@
"displayName": "windows x64 directml minsizerel",
"binaryDir": "${sourceDir}/build/directml/win-x64"
},
{
"name": "windows_x64_webgpu_release",
"inherits": [
"windows_release_default",
"windows_webgpu_default"
],
"displayName": "windows x64 webgpu release",
"binaryDir": "${sourceDir}/build/webgpu/win-x64"
},
{
"name": "windows_x64_webgpu_debug",
"inherits": [
"windows_debug_default",
"windows_webgpu_default"
],
"displayName": "windows x64 webgpu debug",
"binaryDir": "${sourceDir}/build/webgpu/win-x64"
},
{
"name": "windows_x64_webgpu_relwithdebinfo",
"inherits": [
"windows_relwithdebinfo_default",
"windows_webgpu_default"
],
"displayName": "windows x64 webgpu relwithdebinfo",
"binaryDir": "${sourceDir}/build/webgpu/win-x64"
},
{
"name": "windows_x64_webgpu_minsizerel",
"inherits": [
"windows_minsizerel_default",
"windows_webgpu_default"
],
"displayName": "windows x64 webgpu minsizerel",
"binaryDir": "${sourceDir}/build/webgpu/win-x64"
},
{
"name": "windows_arm64_cpu_relwithdebinfo",
"inherits": [
Expand Down
27 changes: 24 additions & 3 deletions test/c_api_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,15 @@
#define PHI2_PATH MODEL_PATH "phi-2/int4/cuda"
#elif USE_DML
#define PHI2_PATH MODEL_PATH "phi-2/int4/dml"
#elif USE_WEBGPU
#define PHI2_PATH MODEL_PATH "phi-2/int4/webgpu"
#else
#define PHI2_PATH MODEL_PATH "phi-2/int4/cpu"
#endif
#endif

#ifndef ENABLE_ENGINE_TESTS
#define ENABLE_ENGINE_TESTS TEST_PHI2 && !USE_DML
#define ENABLE_ENGINE_TESTS TEST_PHI2 && !USE_DML && !USE_WEBGPU
Comment thread
qjia7 marked this conversation as resolved.
Outdated
#endif

TEST(CAPITests, Config) {
Expand Down Expand Up @@ -316,8 +318,9 @@ TEST(CAPIEngineTests, MaxLength) {
#endif

// DML doesn't support batch_size > 1
// TODO: WebGPU should support batch_size > 1, investigate why it's failing
TEST(CAPITests, EndToEndPhiBatch) {
#if TEST_PHI2 && !USE_DML
#if TEST_PHI2 && !USE_DML && !USE_WEBGPU
auto model = OgaModel::Create(PHI2_PATH);
auto tokenizer = OgaTokenizer::Create(*model);

Expand Down Expand Up @@ -925,7 +928,7 @@ TEST(CAPITests, SetTerminate) {
#endif
}

// DML Doesn't support batch_size > 1
// DML doesn't support batch_size > 1
#if TEST_PHI2 && !USE_DML

struct Phi2Test {
Expand Down Expand Up @@ -1017,6 +1020,12 @@ class ParametrizedTopKCAPITestsTests : public ::testing::TestWithParam<bool> {
};

TEST_P(ParametrizedTopKCAPITestsTests, TopKCAPI) {
#if USE_WEBGPU
if (GetParam()) {
GTEST_SKIP() << "Skipping Engine test for WebGPU";
}
#endif

Phi2Test test;

test.params_->SetSearchOptionBool("do_sample", true);
Expand All @@ -1038,6 +1047,12 @@ class ParametrizedTopPCAPITestsTests : public ::testing::TestWithParam<bool> {
};

TEST_P(ParametrizedTopPCAPITestsTests, TopPCAPI) {
#if USE_WEBGPU
if (GetParam()) {
GTEST_SKIP() << "Skipping Engine test for WebGPU";
}
#endif

Phi2Test test;

test.params_->SetSearchOptionBool("do_sample", true);
Expand All @@ -1059,6 +1074,12 @@ class ParametrizedTopKTopPCAPITestsTests : public ::testing::TestWithParam<bool>
};

TEST_P(ParametrizedTopKTopPCAPITestsTests, TopKCAPITest) {
#if USE_WEBGPU
if (GetParam()) {
GTEST_SKIP() << "Skipping Engine test for WebGPU";
}
#endif

Phi2Test test;

test.params_->SetSearchOptionBool("do_sample", true);
Expand Down
2 changes: 2 additions & 0 deletions test/model_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ extern std::string g_custom_model_path;
#define PHI2_PATH MODEL_PATH "phi-2/int4/cuda"
#elif USE_DML
#define PHI2_PATH MODEL_PATH "phi-2/int4/dml"
#elif USE_WEBGPU
#define PHI2_PATH MODEL_PATH "phi-2/int4/webgpu"
Comment thread
qjia7 marked this conversation as resolved.
Outdated
#else
#define PHI2_PATH MODEL_PATH "phi-2/int4/cpu"
#endif
Expand Down
4 changes: 3 additions & 1 deletion test/python/test_onnxruntime_genai.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import os
import pathlib
import sys
import sysconfig

import onnxruntime_genai as og
from _test_utils import download_models, run_subprocess
Expand Down Expand Up @@ -81,6 +80,9 @@ def main():
output_paths += download_models(os.path.abspath(args.test_models), "int4", "cuda", log)
if og.is_dml_available():
output_paths += download_models(os.path.abspath(args.test_models), "int4", "dml", log)
# Only build WebGPU models if TEST_WEBGPU environment variable is set
if og.is_webgpu_available() and os.environ.get("TEST_WEBGPU", "").lower() in ["true", "1", "yes"]:
Comment thread
qjia7 marked this conversation as resolved.
Outdated
output_paths += download_models(os.path.abspath(args.test_models), "int4", "webgpu", log)

# Run ONNX Runtime GenAI tests
run_onnxruntime_genai_api_tests(os.path.abspath(args.cwd), log, os.path.abspath(args.test_models))
Expand Down
4 changes: 4 additions & 0 deletions test/python/test_onnxruntime_genai_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@
if og.is_openvino_available():
devices.append("openvino")

if og.is_webgpu_available():
devices.append("webgpu")


def test_config(test_data_path):
model_path = os.fspath(Path(test_data_path) / "hf-internal-testing" / "tiny-random-gpt2-fp32")
Expand Down Expand Up @@ -172,6 +175,7 @@ def test_greedy_search(test_data_path, relative_model_path):
assert np.array_equal(expected_sequence[i], generator.get_sequence(i))
assert int(generator.token_count()) == len(generator.get_sequence(0))


@pytest.mark.parametrize(
"relative_model_path",
(
Expand Down
1 change: 1 addition & 0 deletions test/python/webgpu/ort/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
onnxruntime-webgpu==1.24.1.dev20260203002
2 changes: 2 additions & 0 deletions test/python/webgpu/torch/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
--index-url https://download.pytorch.org/whl/cpu
torch==2.7.1+cpu
Loading