Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 107 additions & 0 deletions .github/workflows/win-webgpu-x64-build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
name: "Windows WebGPU x64 Build"
on:
workflow_dispatch:
push:
branches:
- main
- rel-*
pull_request:

concurrency:
group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.ref || github.sha }}
cancel-in-progress: true

env:
AZCOPY_AUTO_LOGIN_TYPE: MSI
AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4
ORT_NIGHTLY_REST_API: "https://feeds.dev.azure.com/aiinfra/PublicPackages/_apis/packaging/Feeds/ORT-Nightly/packages?packageNameQuery=Microsoft.ML.OnnxRuntime&api-version=6.0-preview.1"
ORT_PACKAGE_NAME: "Microsoft.ML.OnnxRuntime"
binaryDir: 'build/cpu/win-x64'
TEST_WEBGPU: 'true'

jobs:
windows-webgpu-x64-build:
runs-on: ["self-hosted", "1ES.Pool=onnxruntime-genai-Win2022-GPU-A10"]
steps:
- name: Checkout OnnxRuntime GenAI repo
uses: actions/checkout@v5
with:
submodules: true

- uses: actions/setup-python@v6
with:
python-version: '3.12.x'
architecture: 'x64'

- name: Setup VCPKG
uses: microsoft/onnxruntime-github-actions/setup-build-tools@v0.0.8
with:
vcpkg-version: '2025.03.19'
vcpkg-hash: '17e96169cd3f266c4716fcdc1bb728e6a64f103941ece463a2834d50694eba4fb48f30135503fd466402afa139abc847ef630733c442595d1c34979f261b0114'
cmake-version: '3.31.6'
cmake-hash: '0f1584e8666cf4a65ec514bd02afe281caabf1d45d2c963f3151c41484f457386aa03273ab25776a670be02725354ce0b46f3a5121857416da37366342a833a0'
add-cmake-to-path: 'true'
disable-terrapin: 'false'

- uses: actions/setup-dotnet@v5
with:
dotnet-version: '8.0.x'

- name: Download OnnxRuntime Foundry Package (includes WebGPU support)
shell: pwsh
run: |
# Use Foundry package which includes WebGPU DLLs (dxil.dll, dxcompiler.dll)
$FOUNDRY_VERSION = "1.25.0-dev-20260210-0905-b214734cba"
Write-Host "Downloading OnnxRuntime.Foundry version: $FOUNDRY_VERSION"
nuget install Microsoft.ML.OnnxRuntime.Foundry -version $FOUNDRY_VERSION -Source https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/nuget/v3/index.json -x -NonInteractive -ExcludeVersion -DependencyVersion Ignore

- run: Get-ChildItem Microsoft.ML.OnnxRuntime.Foundry -Recurse
continue-on-error: true

- name: Extract OnnxRuntime library and header files
run: |
mkdir ort/lib
move Microsoft.ML.OnnxRuntime.Foundry/build/native/include ort/
move Microsoft.ML.OnnxRuntime.Foundry/runtimes/win-x64/native/* ort/lib/

- name: Install Rust Toolchain
run: |
$exePath = "$env:TEMP\rustup-init.exe"
(New-Object Net.WebClient).DownloadFile('https://static.rust-lang.org/rustup/dist/x86_64-pc-windows-msvc/rustup-init.exe', $exePath)
& $exePath -y --default-toolchain=1.86.0
Add-Content $env:GITHUB_PATH "$env:USERPROFILE\.cargo\bin"

- name: Configure CMake
run: |
cmake --preset windows_x64_cpu_release -DTEST_PHI2=True

- name: Build with CMake
run: |
cmake --build --preset windows_x64_cpu_release --parallel
cmake --build --preset windows_x64_cpu_release --target PyPackageBuild

- name: Install Python dependencies and Wheel
run: |
python -m pip install -r test\python\requirements.txt
python -m pip install -r test\python\webgpu\torch\requirements.txt
python -m pip install -r test\python\webgpu\ort\requirements.txt
python -m pip install (Get-ChildItem ("$env:binaryDir\wheel\*.whl")) --no-deps

- name: Build the Java API and Run the Java Tests
run: |
python build.py --config=Release --build_dir $env:binaryDir --build_java --parallel

- name: Run the Python Tests
run: |
python test/python/test_onnxruntime_genai.py --cwd "test\python" --test_models "test\test_models" --e2e

- name: Verify Build Artifacts
if: always()
continue-on-error: true
run: |
Get-ChildItem -Path $env:GITHUB_WORKSPACE\$env:binaryDir -Recurse

- name: Run C++ Unit Tests
run: |-
copy $env:GITHUB_WORKSPACE\ort\lib\* .\$env:binaryDir\Release
& .\$env:binaryDir\Release\unit_tests.exe
7 changes: 7 additions & 0 deletions .pipelines/stages/jobs/steps/python-validation-step.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ steps:
python -m pip install -r test/python/directml/torch/requirements.txt
python -m pip install -r test/python/directml/ort/requirements.txt
}
elseif ("$(ep)" -eq "webgpu") {
python -m pip install -r test/python/webgpu/torch/requirements.txt
python -m pip install -r test/python/webgpu/ort/requirements.txt
}
elseif ("$(arch)" -eq "arm64") {
python -m pip install onnxruntime-qnn
}
Expand All @@ -47,6 +51,9 @@ steps:

if ("$(ep)" -eq "directml") {
python ${{ parameters.PythonScriptName }} -m .\${{ parameters.LocalFolder }}\${{ parameters.ModelFolder }} -e dml --non_interactive
} elseif ("$(ep)" -eq "webgpu") {
$env:TEST_WEBGPU = "true"
python ${{ parameters.PythonScriptName }} -m .\${{ parameters.LocalFolder }}\${{ parameters.ModelFolder }} -e $(ep) --non_interactive
} else {
python ${{ parameters.PythonScriptName }} -m .\${{ parameters.LocalFolder }}\${{ parameters.ModelFolder }} -e $(ep) --non_interactive
}
Expand Down
7 changes: 7 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,13 @@ if(ENABLE_TESTS)
else()
add_compile_definitions(TEST_PHI2=0)
endif()

if (USE_WEBGPU)
add_compile_definitions(USE_WEBGPU=1)
else()
add_compile_definitions(USE_WEBGPU=0)
endif()

endif()

if(ENABLE_TRACING)
Expand Down
40 changes: 23 additions & 17 deletions test/c_api_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <fstream>
#include <numeric>
#include <iostream>
#include <string>
#include <thread>
#include <vector>
#include <regex>
Expand All @@ -17,21 +18,10 @@

#include <gtest/gtest.h>

#ifndef MODEL_PATH
#define MODEL_PATH "../../test/test_models/"
#endif
#ifndef PHI2_PATH
#if USE_CUDA
#define PHI2_PATH MODEL_PATH "phi-2/int4/cuda"
#elif USE_DML
#define PHI2_PATH MODEL_PATH "phi-2/int4/dml"
#else
#define PHI2_PATH MODEL_PATH "phi-2/int4/cpu"
#endif
#endif
#include "test_utils.h"

#ifndef ENABLE_ENGINE_TESTS
#define ENABLE_ENGINE_TESTS TEST_PHI2 && !USE_DML
#ifndef PHI2_PATH
#define PHI2_PATH test_utils::GetPhi2Path().c_str()
#endif

TEST(CAPITests, Config) {
Expand Down Expand Up @@ -316,8 +306,12 @@ TEST(CAPIEngineTests, MaxLength) {
#endif

// DML doesn't support batch_size > 1
// TODO: WebGPU should support batch_size > 1, investigate why it's failing
TEST(CAPITests, EndToEndPhiBatch) {
#if TEST_PHI2 && !USE_DML
#if TEST_PHI2
if (!test_utils::IsEngineTestsEnabled()) {
GTEST_SKIP() << "Skipping batch test for DML/WebGPU";
}
auto model = OgaModel::Create(PHI2_PATH);
auto tokenizer = OgaTokenizer::Create(*model);

Expand Down Expand Up @@ -630,7 +624,7 @@ TEST(CAPIEngineTests, EndToEndPhi) {
TEST(CAPITests, LoadModelFromMemory) {
#if TEST_PHI2

const char* model_path = PHI2_PATH "/model.onnx";
std::string model_path = std::string(PHI2_PATH) + "/model.onnx";
std::ifstream model_file(model_path, std::ios::binary | std::ios::ate);
ASSERT_TRUE(model_file.is_open()) << "Failed to open model file: " << model_path;
std::streamsize size = model_file.tellg();
Expand Down Expand Up @@ -925,7 +919,7 @@ TEST(CAPITests, SetTerminate) {
#endif
}

// DML Doesn't support batch_size > 1
// DML doesn't support batch_size > 1
#if TEST_PHI2 && !USE_DML

struct Phi2Test {
Expand Down Expand Up @@ -1017,6 +1011,10 @@ class ParametrizedTopKCAPITestsTests : public ::testing::TestWithParam<bool> {
};

TEST_P(ParametrizedTopKCAPITestsTests, TopKCAPI) {
if (GetParam() && !test_utils::IsEngineTestsEnabled()) {
GTEST_SKIP() << "Skipping Engine test for DML/WebGPU";
}

Phi2Test test;

test.params_->SetSearchOptionBool("do_sample", true);
Expand All @@ -1038,6 +1036,10 @@ class ParametrizedTopPCAPITestsTests : public ::testing::TestWithParam<bool> {
};

TEST_P(ParametrizedTopPCAPITestsTests, TopPCAPI) {
if (GetParam() && !test_utils::IsEngineTestsEnabled()) {
GTEST_SKIP() << "Skipping Engine test for DML/WebGPU";
}

Phi2Test test;

test.params_->SetSearchOptionBool("do_sample", true);
Expand All @@ -1059,6 +1061,10 @@ class ParametrizedTopKTopPCAPITestsTests : public ::testing::TestWithParam<bool>
};

TEST_P(ParametrizedTopKTopPCAPITestsTests, TopKCAPITest) {
if (GetParam() && !test_utils::IsEngineTestsEnabled()) {
GTEST_SKIP() << "Skipping Engine test for DML/WebGPU";
}

Phi2Test test;

test.params_->SetSearchOptionBool("do_sample", true);
Expand Down
13 changes: 3 additions & 10 deletions test/model_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,13 @@
#include <ort_genai.h>
#include <gtest/gtest.h>

#include "test_utils.h"

// External global variable from main.cpp for custom model path
extern std::string g_custom_model_path;

#ifndef MODEL_PATH
#define MODEL_PATH "../../test/test_models/"
#endif
#ifndef PHI2_PATH
#if USE_CUDA
#define PHI2_PATH MODEL_PATH "phi-2/int4/cuda"
#elif USE_DML
#define PHI2_PATH MODEL_PATH "phi-2/int4/dml"
#else
#define PHI2_PATH MODEL_PATH "phi-2/int4/cpu"
#endif
#define PHI2_PATH test_utils::GetPhi2Path().c_str()
#endif

// To generate this file:
Expand Down
35 changes: 28 additions & 7 deletions test/python/test_onnxruntime_genai.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import os
import pathlib
import sys
import sysconfig

import onnxruntime_genai as og
from _test_utils import download_models, run_subprocess
Expand Down Expand Up @@ -67,6 +66,13 @@ def parse_arguments():
help="Whether to run e2e tests. If not specified e2e tests will not run.",
action="store_true",
)
parser.add_argument(
"--eps",
Comment thread
baijumeswani marked this conversation as resolved.
nargs="+",
choices=["cpu", "cuda", "dml", "webgpu"],
default=[],
help="List of execution providers to build models for. If not specified, auto-detects available EPs.",
)
return parser.parse_args()


Expand All @@ -75,12 +81,27 @@ def main():

log.info("Running onnxruntime-genai tests pipeline")

# Get INT4 ONNX models
output_paths = download_models(os.path.abspath(args.test_models), "int4", "cpu", log)
if og.is_cuda_available():
output_paths += download_models(os.path.abspath(args.test_models), "int4", "cuda", log)
if og.is_dml_available():
output_paths += download_models(os.path.abspath(args.test_models), "int4", "dml", log)
# Determine which EPs to build models for
if args.eps:
# User explicitly specified EPs
eps_to_build = args.eps
log.info(f"Building models for explicitly specified EPs: {eps_to_build}")
else:
# Auto-detect available EPs
eps_to_build = ["cpu"] # CPU is always available
if og.is_cuda_available():
eps_to_build.append("cuda")
if og.is_dml_available():
eps_to_build.append("dml")
# Only build WebGPU models if TEST_WEBGPU environment variable is set
if og.is_webgpu_available() and os.environ.get("TEST_WEBGPU", "").lower() in ["true", "1", "yes"]:
eps_to_build.append("webgpu")
log.info(f"Auto-detected available EPs: {eps_to_build}")

# Get INT4 ONNX models for specified/detected EPs
output_paths = []
for ep in eps_to_build:
output_paths += download_models(os.path.abspath(args.test_models), "int4", ep, log)

# Run ONNX Runtime GenAI tests
run_onnxruntime_genai_api_tests(os.path.abspath(args.cwd), log, os.path.abspath(args.test_models))
Expand Down
4 changes: 4 additions & 0 deletions test/python/test_onnxruntime_genai_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@
if og.is_openvino_available():
devices.append("openvino")

if og.is_webgpu_available():
devices.append("webgpu")


def test_config(test_data_path):
model_path = os.fspath(Path(test_data_path) / "hf-internal-testing" / "tiny-random-gpt2-fp32")
Expand Down Expand Up @@ -172,6 +175,7 @@ def test_greedy_search(test_data_path, relative_model_path):
assert np.array_equal(expected_sequence[i], generator.get_sequence(i))
assert int(generator.token_count()) == len(generator.get_sequence(0))


@pytest.mark.parametrize(
"relative_model_path",
(
Expand Down
1 change: 1 addition & 0 deletions test/python/webgpu/ort/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
onnxruntime-webgpu==1.25.0.dev20260210001
2 changes: 2 additions & 0 deletions test/python/webgpu/torch/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
--index-url https://download.pytorch.org/whl/cpu
torch==2.7.1+cpu
54 changes: 54 additions & 0 deletions test/test_utils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#pragma once

#include <filesystem>
#include <string>
#include <vector>

#ifndef MODEL_PATH
#define MODEL_PATH "../../test/test_models/"
#endif

namespace test_utils {

// Helper function to get the appropriate PHI2 model path based on available models
inline const std::string& GetPhi2Path() {
static std::string phi2_path;
if (!phi2_path.empty()) {
return phi2_path;
}

std::vector<std::string> candidate_paths = {
MODEL_PATH "phi-2/int4/cuda",
MODEL_PATH "phi-2/int4/dml",
MODEL_PATH "phi-2/int4/webgpu",
MODEL_PATH "phi-2/int4/cpu"};

for (const auto& path : candidate_paths) {
std::filesystem::path model_path(path);
if (std::filesystem::exists(model_path / "genai_config.json")) {
phi2_path = path;
return phi2_path;
}
}
Comment thread
qjia7 marked this conversation as resolved.

// Fallback to CPU path
phi2_path = MODEL_PATH "phi-2/int4/cpu";
return phi2_path;
}

// Helper to detect if we're using WebGPU or DML EP based on the model path
inline bool IsEngineTestsEnabled() {
#if TEST_PHI2
std::string path = GetPhi2Path();
// Skip engine tests for DML and WebGPU (batching not fully tested)
return path.find("/dml") == std::string::npos &&
path.find("/webgpu") == std::string::npos;
#else
return false;
#endif
}

} // namespace test_utils
Loading