Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 91 additions & 12 deletions .github/workflows/linux-cpu-x64-nightly-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,38 @@ concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
env:
ort_dir: "onnxruntime-linux-x64-1.18.0"
ort_zip: "onnxruntime-linux-x64-1.18.0.tgz"
ort_url: "https://github.com/microsoft/onnxruntime/releases/download/v1.18.0/onnxruntime-linux-x64-1.18.0.tgz"
ORT_NIGHTLY_REST_API: "https://feeds.dev.azure.com/aiinfra/PublicPackages/_apis/packaging/Feeds/ORT-Nightly/packages?packageNameQuery=Microsoft.ML.OnnxRuntime&api-version=6.0-preview.1"
ORT_PACKAGE_NAME: "Microsoft.ML.OnnxRuntime"
ORT_NIGHTLY_SOURCE: "https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/nuget/v3/index.json"
DOTNET_INSTALL_DIR: "${{ github.workspace }}/dotnet"
jobs:
job:
runs-on: ["self-hosted", "1ES.Pool=onnxruntime-genai-Ubuntu2204-AMD-CPU"]
steps:
- name: Checkout OnnxRuntime GenAI repo
uses: actions/checkout@v5

- uses: actions/setup-dotnet@v5
with:
dotnet-version: '8.0.x'

- name: Setup Java 17
uses: actions/setup-java@v5
with:
java-version: '17'
distribution: 'temurin'
cache: 'gradle'

- name: Setup Gradle
uses: gradle/actions/setup-gradle@v3
with:
gradle-version: '8.6'

- uses: actions/setup-python@v6
with:
python-version: '3.11.x'
architecture: 'x64'

- uses: microsoft/onnxruntime-github-actions/setup-build-tools@v0.0.8
with:
vcpkg-version: '2025.03.19'
Expand All @@ -30,6 +52,7 @@ jobs:
cmake-hash: '42395e20b10a8e9ef3e33014f9a4eed08d46ab952e02d2c1bbc8f6133eca0d7719fb75680f9bbff6552f20fcd1b73d86860f7f39388d631f98fb6f622b37cf04'
add-cmake-to-path: 'true'
disable-terrapin: 'true'

- name: Install Rust with rustup
run: |
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
Expand All @@ -40,32 +63,93 @@ jobs:
rustup component add rust-src
rustup show active-toolchain

- name: Get the Latest OnnxRuntime Nightly Version
shell: pwsh
run: |
$resp = Invoke-RestMethod "${{ env.ORT_NIGHTLY_REST_API }}"
$ORT_NIGHTLY_VERSION = $resp.value[0].versions[0].normalizedVersion
Write-Host "$ORT_NIGHTLY_VERSION"
"ORT_NIGHTLY_VERSION=$ORT_NIGHTLY_VERSION" | Out-File -FilePath $env:GITHUB_ENV -Append

- name: Download OnnxRuntime Nightly
run: |
dotnet new console
dotnet add package ${{ env.ORT_PACKAGE_NAME }} --version ${{ env.ORT_NIGHTLY_VERSION }} --source ${{ env.ORT_NIGHTLY_SOURCE }} --package-directory .
continue-on-error: true

- name: list files
shell: bash
run: |
ls -l
ls -R ${{ env.ORT_PACKAGE_NAME }}
continue-on-error: true

# TODO: Find out why do we need to to have libonnxruntime.so.$ort_version
- name: Extract OnnxRuntime library and header files
run: |
set -e -x
mkdir -p ort/lib
mv microsoft.ml.onnxruntime/${{ env.ORT_NIGHTLY_VERSION }}/build/native/include ort/
mv microsoft.ml.onnxruntime/${{ env.ORT_NIGHTLY_VERSION }}/runtimes/linux-x64/native/* ort/lib/
cp ort/lib/libonnxruntime.so ort/lib/libonnxruntime.so.1

- name: Git Submodule Update
run: |
git submodule update --init --recursive

- name: Build with CMake and clang
- name: Build with CMake and GCC
run: |
set -e -x
rm -rf build
cmake --preset linux_gcc_cpu_release
cmake --build --preset linux_gcc_cpu_release
cmake --build --preset linux_gcc_cpu_release --target PyPackageBuild

- name: Install the python wheel and test dependencies
- name: Install the Python wheel and test dependencies
run: |
python3 -m pip install -r test/python/requirements.txt --user
python3 -m pip install -r test/python/cpu/torch/requirements.txt --user
python3 -m pip install -r test/python/cpu/ort/requirements.txt --user
python3 -m pip install build/cpu/wheel/onnxruntime_genai*.whl --no-deps

- name: Run the python tests
- name: Verify Build Artifacts
if: always()
run: |
ls -l ${{ github.workspace }}/build

- name: Build the Java API and Run the Java Tests
run: |
set -e -x
python3 build.py --config=Release --build_dir build/cpu --build_java --parallel --cmake_generator "Ninja" --use_guidance

- name: Run the Python tests
run: |
export ORTGENAI_LOG_ORT_LIB=1
python3 test/python/test_onnxruntime_genai.py --cwd test/python --test_models test/test_models --e2e

- name: Build the C# API and Run the C# Tests
run: |
export ORTGENAI_LOG_ORT_LIB=1
cd test/csharp
dotnet test /p:Configuration=Release /p:NativeBuildOutputDir="../../build/cpu/" /p:OrtLibDir="../../ort/lib/" --verbosity normal

- name: Build the C# Examples
run: |
export ORTGENAI_LOG_ORT_LIB=1
cd examples/csharp/ModelChat
dotnet build -c Release
cd ../ModelMM
dotnet build -c Release

- name: Test the C# LLM Example with Tool Calling
run: |
export ORTGENAI_LOG_ORT_LIB=1
python3 test/python/special_tokens.py -p test/test_models/qwen-2.5-0.5b/int4/cpu/tokenizer.json -s "<tool_call>" -e "</tool_call>"
./examples/csharp/ModelChat/bin/Release/net8.0/ModelChat -m test/test_models/qwen-2.5-0.5b/int4/cpu/ -e cpu --response_format lark_grammar --tools_file test/test_models/tool-definitions/weather.json --tool_call_start "<tool_call>" --tool_call_end "</tool_call>" --user_prompt "What is the weather in Redmond, WA?" --tool_output --non_interactive --verbose

- name: Run Q&A Example
run: |
python3 -m onnxruntime_genai.models.builder -i /data/ortgenai/pytorch/qwen2.5-0.5b-instruct -e cpu -p int4 -o ./example-models/qwen2.5-0.5b-instruct
python3 -m onnxruntime_genai.models.builder -i /data/ortgenai/pytorch/qwen2.5-0.5b-instruct -o ./example-models/qwen2.5-0.5b-instruct -p int4 -e cpu
python3 examples/python/model-qa.py -m ./example-models/qwen2.5-0.5b-instruct -e cpu --user_prompt "what is 10+4?" --non_interactive > output.log 2>&1
if cat output.log | grep -Eq "14|fourteen"; then
echo "Result seems correct"
Expand All @@ -74,11 +158,6 @@ jobs:
exit 1
fi

- name: Verify Build Artifacts
if: always()
run: |
ls -l ${{ github.workspace }}/build

- name: Upload Build Artifacts
uses: actions/upload-artifact@v4
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/linux-gpu-x64-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ jobs:
${{ env.PYTHON_EXECUTABLE }} -m pip install -r test/python/requirements.txt --user && \
${{ env.PYTHON_EXECUTABLE }} -m pip install -r test/python/cuda/torch/requirements.txt --user && \
${{ env.PYTHON_EXECUTABLE }} -m pip install -r test/python/cuda/ort/requirements.txt --user && \
${{ env.PYTHON_EXECUTABLE }} build.py --config=Release --build_dir build/cuda --build_java --parallel --cmake_generator Ninja --cmake_extra_defines PYTHON_EXECUTABLE=${{ env.PYTHON_EXECUTABLE }}"
${{ env.PYTHON_EXECUTABLE }} build.py --config=Release --build_dir build/cuda --build_java --parallel --cmake_generator Ninja --use_guidance --cmake_extra_defines PYTHON_EXECUTABLE=${{ env.PYTHON_EXECUTABLE }}"

- name: Install the onnxruntime-genai Python wheel and run Python tests
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/win-cuda-x64-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ jobs:

- name: Build the Java API and Run the Java Tests
run: |
python build.py --config=Release --build_dir $env:binaryDir --build_java --parallel
python build.py --config=Release --build_dir $env:binaryDir --build_java --parallel --use_guidance

- name: Run the Python Tests
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/win-directml-x64-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ jobs:

- name: Build the Java API and Run the Java Tests
run: |
python build.py --config=Release --build_dir $env:binaryDir --build_java --parallel
python build.py --config=Release --build_dir $env:binaryDir --build_java --parallel --use_guidance

- name: Run the Python Tests
run: |
Expand Down
2 changes: 1 addition & 1 deletion VERSION_INFO
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.12.0-dev
0.13.0-dev
1 change: 1 addition & 0 deletions build.py
Original file line number Diff line number Diff line change
Expand Up @@ -803,6 +803,7 @@ def build_examples(args: argparse.Namespace, env: dict[str, str]):
"-DORT_LIB_DIR=" + str(ort_lib_dir),
"-DOGA_INCLUDE_DIR=" + str(oga_include_dir),
"-DOGA_LIB_DIR=" + str(oga_lib_dir),
"-DUSE_GUIDANCE=" + 'ON' if args.use_guidance else 'OFF',
]
)

Expand Down
2 changes: 1 addition & 1 deletion src/constrained_logits_processor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ std::unique_ptr<ConstrainedLogitsProcessor> CreateGuidanceLogitsProcessor(const
#if USE_GUIDANCE
return std::make_unique<GuidanceLogitsProcessor>(state);
#endif
Log("warning", "No supported ConstrainedLogitsProcessor found. e.g. to use guidance, build with use_guidance=true");
Log("warning", "No supported ConstrainedLogitsProcessor found. To use guidance, build with use_guidance=true");
}
return nullptr;
}
Expand Down
Loading