diff --git a/.github/workflows/linux-cpu-x64-nightly-build.yml b/.github/workflows/linux-cpu-x64-nightly-build.yml index 6b4b89c010..d459978af6 100644 --- a/.github/workflows/linux-cpu-x64-nightly-build.yml +++ b/.github/workflows/linux-cpu-x64-nightly-build.yml @@ -12,9 +12,10 @@ concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true env: - ort_dir: "onnxruntime-linux-x64-1.18.0" - ort_zip: "onnxruntime-linux-x64-1.18.0.tgz" - ort_url: "https://github.com/microsoft/onnxruntime/releases/download/v1.18.0/onnxruntime-linux-x64-1.18.0.tgz" + ORT_NIGHTLY_REST_API: "https://feeds.dev.azure.com/aiinfra/PublicPackages/_apis/packaging/Feeds/ORT-Nightly/packages?packageNameQuery=Microsoft.ML.OnnxRuntime&api-version=6.0-preview.1" + ORT_PACKAGE_NAME: "Microsoft.ML.OnnxRuntime" + ORT_NIGHTLY_SOURCE: "https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/nuget/v3/index.json" + DOTNET_INSTALL_DIR: "${{ github.workspace }}/dotnet" jobs: job: runs-on: ["self-hosted", "1ES.Pool=onnxruntime-genai-Ubuntu2204-AMD-CPU"] @@ -22,6 +23,27 @@ jobs: - name: Checkout OnnxRuntime GenAI repo uses: actions/checkout@v5 + - uses: actions/setup-dotnet@v5 + with: + dotnet-version: '8.0.x' + + - name: Setup Java 17 + uses: actions/setup-java@v5 + with: + java-version: '17' + distribution: 'temurin' + cache: 'gradle' + + - name: Setup Gradle + uses: gradle/actions/setup-gradle@v3 + with: + gradle-version: '8.6' + + - uses: actions/setup-python@v6 + with: + python-version: '3.11.x' + architecture: 'x64' + - uses: microsoft/onnxruntime-github-actions/setup-build-tools@v0.0.8 with: vcpkg-version: '2025.03.19' @@ -30,6 +52,7 @@ jobs: cmake-hash: '42395e20b10a8e9ef3e33014f9a4eed08d46ab952e02d2c1bbc8f6133eca0d7719fb75680f9bbff6552f20fcd1b73d86860f7f39388d631f98fb6f622b37cf04' add-cmake-to-path: 'true' disable-terrapin: 'true' + - name: Install Rust with rustup run: | curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y @@ -40,11 +63,41 @@ jobs: rustup component add rust-src rustup show active-toolchain + - name: Get the Latest OnnxRuntime Nightly Version + shell: pwsh + run: | + $resp = Invoke-RestMethod "${{ env.ORT_NIGHTLY_REST_API }}" + $ORT_NIGHTLY_VERSION = $resp.value[0].versions[0].normalizedVersion + Write-Host "$ORT_NIGHTLY_VERSION" + "ORT_NIGHTLY_VERSION=$ORT_NIGHTLY_VERSION" | Out-File -FilePath $env:GITHUB_ENV -Append + + - name: Download OnnxRuntime Nightly + run: | + dotnet new console + dotnet add package ${{ env.ORT_PACKAGE_NAME }} --version ${{ env.ORT_NIGHTLY_VERSION }} --source ${{ env.ORT_NIGHTLY_SOURCE }} --package-directory . + continue-on-error: true + + - name: list files + shell: bash + run: | + ls -l + ls -R ${{ env.ORT_PACKAGE_NAME }} + continue-on-error: true + +# TODO: Find out why do we need to to have libonnxruntime.so.$ort_version + - name: Extract OnnxRuntime library and header files + run: | + set -e -x + mkdir -p ort/lib + mv microsoft.ml.onnxruntime/${{ env.ORT_NIGHTLY_VERSION }}/build/native/include ort/ + mv microsoft.ml.onnxruntime/${{ env.ORT_NIGHTLY_VERSION }}/runtimes/linux-x64/native/* ort/lib/ + cp ort/lib/libonnxruntime.so ort/lib/libonnxruntime.so.1 + - name: Git Submodule Update run: | git submodule update --init --recursive - - name: Build with CMake and clang + - name: Build with CMake and GCC run: | set -e -x rm -rf build @@ -52,20 +105,51 @@ jobs: cmake --build --preset linux_gcc_cpu_release cmake --build --preset linux_gcc_cpu_release --target PyPackageBuild - - name: Install the python wheel and test dependencies + - name: Install the Python wheel and test dependencies run: | python3 -m pip install -r test/python/requirements.txt --user python3 -m pip install -r test/python/cpu/torch/requirements.txt --user python3 -m pip install -r test/python/cpu/ort/requirements.txt --user python3 -m pip install build/cpu/wheel/onnxruntime_genai*.whl --no-deps - - name: Run the python tests + - name: Verify Build Artifacts + if: always() run: | + ls -l ${{ github.workspace }}/build + + - name: Build the Java API and Run the Java Tests + run: | + set -e -x + python3 build.py --config=Release --build_dir build/cpu --build_java --parallel --cmake_generator "Ninja" --use_guidance + + - name: Run the Python tests + run: | + export ORTGENAI_LOG_ORT_LIB=1 python3 test/python/test_onnxruntime_genai.py --cwd test/python --test_models test/test_models --e2e + - name: Build the C# API and Run the C# Tests + run: | + export ORTGENAI_LOG_ORT_LIB=1 + cd test/csharp + dotnet test /p:Configuration=Release /p:NativeBuildOutputDir="../../build/cpu/" /p:OrtLibDir="../../ort/lib/" --verbosity normal + + - name: Build the C# Examples + run: | + export ORTGENAI_LOG_ORT_LIB=1 + cd examples/csharp/ModelChat + dotnet build -c Release + cd ../ModelMM + dotnet build -c Release + + - name: Test the C# LLM Example with Tool Calling + run: | + export ORTGENAI_LOG_ORT_LIB=1 + python3 test/python/special_tokens.py -p test/test_models/qwen-2.5-0.5b/int4/cpu/tokenizer.json -s "" -e "" + ./examples/csharp/ModelChat/bin/Release/net8.0/ModelChat -m test/test_models/qwen-2.5-0.5b/int4/cpu/ -e cpu --response_format lark_grammar --tools_file test/test_models/tool-definitions/weather.json --tool_call_start "" --tool_call_end "" --user_prompt "What is the weather in Redmond, WA?" --tool_output --non_interactive --verbose + - name: Run Q&A Example run: | - python3 -m onnxruntime_genai.models.builder -i /data/ortgenai/pytorch/qwen2.5-0.5b-instruct -e cpu -p int4 -o ./example-models/qwen2.5-0.5b-instruct + python3 -m onnxruntime_genai.models.builder -i /data/ortgenai/pytorch/qwen2.5-0.5b-instruct -o ./example-models/qwen2.5-0.5b-instruct -p int4 -e cpu python3 examples/python/model-qa.py -m ./example-models/qwen2.5-0.5b-instruct -e cpu --user_prompt "what is 10+4?" --non_interactive > output.log 2>&1 if cat output.log | grep -Eq "14|fourteen"; then echo "Result seems correct" @@ -74,11 +158,6 @@ jobs: exit 1 fi - - name: Verify Build Artifacts - if: always() - run: | - ls -l ${{ github.workspace }}/build - - name: Upload Build Artifacts uses: actions/upload-artifact@v4 with: diff --git a/.github/workflows/linux-gpu-x64-build.yml b/.github/workflows/linux-gpu-x64-build.yml index e6b0fd8918..6228623f5a 100644 --- a/.github/workflows/linux-gpu-x64-build.yml +++ b/.github/workflows/linux-gpu-x64-build.yml @@ -150,7 +150,7 @@ jobs: ${{ env.PYTHON_EXECUTABLE }} -m pip install -r test/python/requirements.txt --user && \ ${{ env.PYTHON_EXECUTABLE }} -m pip install -r test/python/cuda/torch/requirements.txt --user && \ ${{ env.PYTHON_EXECUTABLE }} -m pip install -r test/python/cuda/ort/requirements.txt --user && \ - ${{ env.PYTHON_EXECUTABLE }} build.py --config=Release --build_dir build/cuda --build_java --parallel --cmake_generator Ninja --cmake_extra_defines PYTHON_EXECUTABLE=${{ env.PYTHON_EXECUTABLE }}" + ${{ env.PYTHON_EXECUTABLE }} build.py --config=Release --build_dir build/cuda --build_java --parallel --cmake_generator Ninja --use_guidance --cmake_extra_defines PYTHON_EXECUTABLE=${{ env.PYTHON_EXECUTABLE }}" - name: Install the onnxruntime-genai Python wheel and run Python tests run: | diff --git a/.github/workflows/win-cuda-x64-build.yml b/.github/workflows/win-cuda-x64-build.yml index 8af0039056..de8fb083ee 100644 --- a/.github/workflows/win-cuda-x64-build.yml +++ b/.github/workflows/win-cuda-x64-build.yml @@ -100,7 +100,7 @@ jobs: - name: Build the Java API and Run the Java Tests run: | - python build.py --config=Release --build_dir $env:binaryDir --build_java --parallel + python build.py --config=Release --build_dir $env:binaryDir --build_java --parallel --use_guidance - name: Run the Python Tests run: | diff --git a/.github/workflows/win-directml-x64-build.yml b/.github/workflows/win-directml-x64-build.yml index 7a234db27b..65bf12ecd2 100644 --- a/.github/workflows/win-directml-x64-build.yml +++ b/.github/workflows/win-directml-x64-build.yml @@ -116,7 +116,7 @@ jobs: - name: Build the Java API and Run the Java Tests run: | - python build.py --config=Release --build_dir $env:binaryDir --build_java --parallel + python build.py --config=Release --build_dir $env:binaryDir --build_java --parallel --use_guidance - name: Run the Python Tests run: | diff --git a/VERSION_INFO b/VERSION_INFO index eef637823a..eca364485e 100644 --- a/VERSION_INFO +++ b/VERSION_INFO @@ -1 +1 @@ -0.12.0-dev \ No newline at end of file +0.13.0-dev \ No newline at end of file diff --git a/build.py b/build.py index ff595232ce..7517a01915 100644 --- a/build.py +++ b/build.py @@ -803,6 +803,7 @@ def build_examples(args: argparse.Namespace, env: dict[str, str]): "-DORT_LIB_DIR=" + str(ort_lib_dir), "-DOGA_INCLUDE_DIR=" + str(oga_include_dir), "-DOGA_LIB_DIR=" + str(oga_lib_dir), + "-DUSE_GUIDANCE=" + 'ON' if args.use_guidance else 'OFF', ] ) diff --git a/src/constrained_logits_processor.cpp b/src/constrained_logits_processor.cpp index 73620fbd9d..42a7623fdc 100644 --- a/src/constrained_logits_processor.cpp +++ b/src/constrained_logits_processor.cpp @@ -261,7 +261,7 @@ std::unique_ptr CreateGuidanceLogitsProcessor(const #if USE_GUIDANCE return std::make_unique(state); #endif - Log("warning", "No supported ConstrainedLogitsProcessor found. e.g. to use guidance, build with use_guidance=true"); + Log("warning", "No supported ConstrainedLogitsProcessor found. To use guidance, build with use_guidance=true"); } return nullptr; }