diff --git a/.github/workflows/linux-cpu-x64-nightly-build.yml b/.github/workflows/linux-cpu-x64-nightly-build.yml
index 6b4b89c010..d459978af6 100644
--- a/.github/workflows/linux-cpu-x64-nightly-build.yml
+++ b/.github/workflows/linux-cpu-x64-nightly-build.yml
@@ -12,9 +12,10 @@ concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
env:
- ort_dir: "onnxruntime-linux-x64-1.18.0"
- ort_zip: "onnxruntime-linux-x64-1.18.0.tgz"
- ort_url: "https://github.com/microsoft/onnxruntime/releases/download/v1.18.0/onnxruntime-linux-x64-1.18.0.tgz"
+ ORT_NIGHTLY_REST_API: "https://feeds.dev.azure.com/aiinfra/PublicPackages/_apis/packaging/Feeds/ORT-Nightly/packages?packageNameQuery=Microsoft.ML.OnnxRuntime&api-version=6.0-preview.1"
+ ORT_PACKAGE_NAME: "Microsoft.ML.OnnxRuntime"
+ ORT_NIGHTLY_SOURCE: "https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/nuget/v3/index.json"
+ DOTNET_INSTALL_DIR: "${{ github.workspace }}/dotnet"
jobs:
job:
runs-on: ["self-hosted", "1ES.Pool=onnxruntime-genai-Ubuntu2204-AMD-CPU"]
@@ -22,6 +23,27 @@ jobs:
- name: Checkout OnnxRuntime GenAI repo
uses: actions/checkout@v5
+ - uses: actions/setup-dotnet@v5
+ with:
+ dotnet-version: '8.0.x'
+
+ - name: Setup Java 17
+ uses: actions/setup-java@v5
+ with:
+ java-version: '17'
+ distribution: 'temurin'
+ cache: 'gradle'
+
+ - name: Setup Gradle
+ uses: gradle/actions/setup-gradle@v3
+ with:
+ gradle-version: '8.6'
+
+ - uses: actions/setup-python@v6
+ with:
+ python-version: '3.11.x'
+ architecture: 'x64'
+
- uses: microsoft/onnxruntime-github-actions/setup-build-tools@v0.0.8
with:
vcpkg-version: '2025.03.19'
@@ -30,6 +52,7 @@ jobs:
cmake-hash: '42395e20b10a8e9ef3e33014f9a4eed08d46ab952e02d2c1bbc8f6133eca0d7719fb75680f9bbff6552f20fcd1b73d86860f7f39388d631f98fb6f622b37cf04'
add-cmake-to-path: 'true'
disable-terrapin: 'true'
+
- name: Install Rust with rustup
run: |
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
@@ -40,11 +63,41 @@ jobs:
rustup component add rust-src
rustup show active-toolchain
+ - name: Get the Latest OnnxRuntime Nightly Version
+ shell: pwsh
+ run: |
+ $resp = Invoke-RestMethod "${{ env.ORT_NIGHTLY_REST_API }}"
+ $ORT_NIGHTLY_VERSION = $resp.value[0].versions[0].normalizedVersion
+ Write-Host "$ORT_NIGHTLY_VERSION"
+ "ORT_NIGHTLY_VERSION=$ORT_NIGHTLY_VERSION" | Out-File -FilePath $env:GITHUB_ENV -Append
+
+ - name: Download OnnxRuntime Nightly
+ run: |
+ dotnet new console
+ dotnet add package ${{ env.ORT_PACKAGE_NAME }} --version ${{ env.ORT_NIGHTLY_VERSION }} --source ${{ env.ORT_NIGHTLY_SOURCE }} --package-directory .
+ continue-on-error: true
+
+ - name: list files
+ shell: bash
+ run: |
+ ls -l
+ ls -R ${{ env.ORT_PACKAGE_NAME }}
+ continue-on-error: true
+
+# TODO: Find out why do we need to to have libonnxruntime.so.$ort_version
+ - name: Extract OnnxRuntime library and header files
+ run: |
+ set -e -x
+ mkdir -p ort/lib
+ mv microsoft.ml.onnxruntime/${{ env.ORT_NIGHTLY_VERSION }}/build/native/include ort/
+ mv microsoft.ml.onnxruntime/${{ env.ORT_NIGHTLY_VERSION }}/runtimes/linux-x64/native/* ort/lib/
+ cp ort/lib/libonnxruntime.so ort/lib/libonnxruntime.so.1
+
- name: Git Submodule Update
run: |
git submodule update --init --recursive
- - name: Build with CMake and clang
+ - name: Build with CMake and GCC
run: |
set -e -x
rm -rf build
@@ -52,20 +105,51 @@ jobs:
cmake --build --preset linux_gcc_cpu_release
cmake --build --preset linux_gcc_cpu_release --target PyPackageBuild
- - name: Install the python wheel and test dependencies
+ - name: Install the Python wheel and test dependencies
run: |
python3 -m pip install -r test/python/requirements.txt --user
python3 -m pip install -r test/python/cpu/torch/requirements.txt --user
python3 -m pip install -r test/python/cpu/ort/requirements.txt --user
python3 -m pip install build/cpu/wheel/onnxruntime_genai*.whl --no-deps
- - name: Run the python tests
+ - name: Verify Build Artifacts
+ if: always()
run: |
+ ls -l ${{ github.workspace }}/build
+
+ - name: Build the Java API and Run the Java Tests
+ run: |
+ set -e -x
+ python3 build.py --config=Release --build_dir build/cpu --build_java --parallel --cmake_generator "Ninja" --use_guidance
+
+ - name: Run the Python tests
+ run: |
+ export ORTGENAI_LOG_ORT_LIB=1
python3 test/python/test_onnxruntime_genai.py --cwd test/python --test_models test/test_models --e2e
+ - name: Build the C# API and Run the C# Tests
+ run: |
+ export ORTGENAI_LOG_ORT_LIB=1
+ cd test/csharp
+ dotnet test /p:Configuration=Release /p:NativeBuildOutputDir="../../build/cpu/" /p:OrtLibDir="../../ort/lib/" --verbosity normal
+
+ - name: Build the C# Examples
+ run: |
+ export ORTGENAI_LOG_ORT_LIB=1
+ cd examples/csharp/ModelChat
+ dotnet build -c Release
+ cd ../ModelMM
+ dotnet build -c Release
+
+ - name: Test the C# LLM Example with Tool Calling
+ run: |
+ export ORTGENAI_LOG_ORT_LIB=1
+ python3 test/python/special_tokens.py -p test/test_models/qwen-2.5-0.5b/int4/cpu/tokenizer.json -s "" -e ""
+ ./examples/csharp/ModelChat/bin/Release/net8.0/ModelChat -m test/test_models/qwen-2.5-0.5b/int4/cpu/ -e cpu --response_format lark_grammar --tools_file test/test_models/tool-definitions/weather.json --tool_call_start "" --tool_call_end "" --user_prompt "What is the weather in Redmond, WA?" --tool_output --non_interactive --verbose
+
- name: Run Q&A Example
run: |
- python3 -m onnxruntime_genai.models.builder -i /data/ortgenai/pytorch/qwen2.5-0.5b-instruct -e cpu -p int4 -o ./example-models/qwen2.5-0.5b-instruct
+ python3 -m onnxruntime_genai.models.builder -i /data/ortgenai/pytorch/qwen2.5-0.5b-instruct -o ./example-models/qwen2.5-0.5b-instruct -p int4 -e cpu
python3 examples/python/model-qa.py -m ./example-models/qwen2.5-0.5b-instruct -e cpu --user_prompt "what is 10+4?" --non_interactive > output.log 2>&1
if cat output.log | grep -Eq "14|fourteen"; then
echo "Result seems correct"
@@ -74,11 +158,6 @@ jobs:
exit 1
fi
- - name: Verify Build Artifacts
- if: always()
- run: |
- ls -l ${{ github.workspace }}/build
-
- name: Upload Build Artifacts
uses: actions/upload-artifact@v4
with:
diff --git a/.github/workflows/linux-gpu-x64-build.yml b/.github/workflows/linux-gpu-x64-build.yml
index e6b0fd8918..6228623f5a 100644
--- a/.github/workflows/linux-gpu-x64-build.yml
+++ b/.github/workflows/linux-gpu-x64-build.yml
@@ -150,7 +150,7 @@ jobs:
${{ env.PYTHON_EXECUTABLE }} -m pip install -r test/python/requirements.txt --user && \
${{ env.PYTHON_EXECUTABLE }} -m pip install -r test/python/cuda/torch/requirements.txt --user && \
${{ env.PYTHON_EXECUTABLE }} -m pip install -r test/python/cuda/ort/requirements.txt --user && \
- ${{ env.PYTHON_EXECUTABLE }} build.py --config=Release --build_dir build/cuda --build_java --parallel --cmake_generator Ninja --cmake_extra_defines PYTHON_EXECUTABLE=${{ env.PYTHON_EXECUTABLE }}"
+ ${{ env.PYTHON_EXECUTABLE }} build.py --config=Release --build_dir build/cuda --build_java --parallel --cmake_generator Ninja --use_guidance --cmake_extra_defines PYTHON_EXECUTABLE=${{ env.PYTHON_EXECUTABLE }}"
- name: Install the onnxruntime-genai Python wheel and run Python tests
run: |
diff --git a/.github/workflows/win-cuda-x64-build.yml b/.github/workflows/win-cuda-x64-build.yml
index 8af0039056..de8fb083ee 100644
--- a/.github/workflows/win-cuda-x64-build.yml
+++ b/.github/workflows/win-cuda-x64-build.yml
@@ -100,7 +100,7 @@ jobs:
- name: Build the Java API and Run the Java Tests
run: |
- python build.py --config=Release --build_dir $env:binaryDir --build_java --parallel
+ python build.py --config=Release --build_dir $env:binaryDir --build_java --parallel --use_guidance
- name: Run the Python Tests
run: |
diff --git a/.github/workflows/win-directml-x64-build.yml b/.github/workflows/win-directml-x64-build.yml
index 7a234db27b..65bf12ecd2 100644
--- a/.github/workflows/win-directml-x64-build.yml
+++ b/.github/workflows/win-directml-x64-build.yml
@@ -116,7 +116,7 @@ jobs:
- name: Build the Java API and Run the Java Tests
run: |
- python build.py --config=Release --build_dir $env:binaryDir --build_java --parallel
+ python build.py --config=Release --build_dir $env:binaryDir --build_java --parallel --use_guidance
- name: Run the Python Tests
run: |
diff --git a/VERSION_INFO b/VERSION_INFO
index eef637823a..eca364485e 100644
--- a/VERSION_INFO
+++ b/VERSION_INFO
@@ -1 +1 @@
-0.12.0-dev
\ No newline at end of file
+0.13.0-dev
\ No newline at end of file
diff --git a/build.py b/build.py
index ff595232ce..7517a01915 100644
--- a/build.py
+++ b/build.py
@@ -803,6 +803,7 @@ def build_examples(args: argparse.Namespace, env: dict[str, str]):
"-DORT_LIB_DIR=" + str(ort_lib_dir),
"-DOGA_INCLUDE_DIR=" + str(oga_include_dir),
"-DOGA_LIB_DIR=" + str(oga_lib_dir),
+ "-DUSE_GUIDANCE=" + 'ON' if args.use_guidance else 'OFF',
]
)
diff --git a/src/constrained_logits_processor.cpp b/src/constrained_logits_processor.cpp
index 73620fbd9d..42a7623fdc 100644
--- a/src/constrained_logits_processor.cpp
+++ b/src/constrained_logits_processor.cpp
@@ -261,7 +261,7 @@ std::unique_ptr CreateGuidanceLogitsProcessor(const
#if USE_GUIDANCE
return std::make_unique(state);
#endif
- Log("warning", "No supported ConstrainedLogitsProcessor found. e.g. to use guidance, build with use_guidance=true");
+ Log("warning", "No supported ConstrainedLogitsProcessor found. To use guidance, build with use_guidance=true");
}
return nullptr;
}