diff --git a/.github/workflows/linux-cpu-x64-build.yml b/.github/workflows/linux-cpu-x64-build.yml index 424eccc716..50ee87b2a9 100644 --- a/.github/workflows/linux-cpu-x64-build.yml +++ b/.github/workflows/linux-cpu-x64-build.yml @@ -96,7 +96,7 @@ jobs: cmake --build --preset linux_gcc_cpu_release cmake --build --preset linux_gcc_cpu_release --target PyPackageBuild - - name: Install the python wheel and test dependencies + - name: Install the Python wheel and test dependencies run: | python3 -m pip install -r test/python/requirements.txt --user python3 -m pip install -r test/python/cpu/torch/requirements.txt --user @@ -110,9 +110,14 @@ jobs: ls -l ${{ github.workspace }}/build/cpu ls -l ${{ github.workspace }}/build/cpu/wheel + - name: Build the Java API and Run the Java Tests + run: | + set -e -x + python3 build.py --config=Release --build_dir build/cpu --build_java --parallel --cmake_generator "Ninja" + # This will also download all the test models to the test/test_models directory # These models are used by the python tests as well as C#, C++ and others. - - name: Run the python tests + - name: Run the Python tests run: | export ORTGENAI_LOG_ORT_LIB=1 python3 test/python/test_onnxruntime_genai.py --cwd test/python --test_models test/test_models @@ -123,10 +128,19 @@ jobs: cd test/csharp dotnet test /p:Configuration=Release /p:NativeBuildOutputDir="../../build/cpu/" /p:OrtLibDir="../../ort/lib/" --verbosity normal - - name: Build the Java API and Run the Java Tests + - name: Build the C# Examples run: | - set -e -x - python3 build.py --config=Release --build_dir build/cpu --build_java --parallel --cmake_generator "Ninja" + export ORTGENAI_LOG_ORT_LIB=1 + cd examples/csharp/ModelChat + dotnet build -c Release + cd ../ModelMM + dotnet build -c Release + + - name: Test the C# LLM Example with Tool Calling + run: | + export ORTGENAI_LOG_ORT_LIB=1 + python3 test/python/special_tokens.py -p test/test_models/qwen-2.5-0.5b/int4/cpu/tokenizer.json -s "" -e "" + ./examples/csharp/ModelChat/bin/Release/net8.0/ModelChat -m test/test_models/qwen-2.5-0.5b/int4/cpu/ -e cpu --response_format lark_grammar --tools_file test/test_models/tool-definitions/weather.json --tool_call_start "" --tool_call_end "" --user_prompt "What is the weather in Redmond, WA?" --tool_output --non_interactive --verbose - name: Run tests run: | diff --git a/.github/workflows/linux-cpu-x64-nightly-build.yml b/.github/workflows/linux-cpu-x64-nightly-build.yml index 5621f7be71..6b4b89c010 100644 --- a/.github/workflows/linux-cpu-x64-nightly-build.yml +++ b/.github/workflows/linux-cpu-x64-nightly-build.yml @@ -66,7 +66,7 @@ jobs: - name: Run Q&A Example run: | python3 -m onnxruntime_genai.models.builder -i /data/ortgenai/pytorch/qwen2.5-0.5b-instruct -e cpu -p int4 -o ./example-models/qwen2.5-0.5b-instruct - python3 examples/python/model-qa.py -m ./example-models/qwen2.5-0.5b-instruct -e cpu --input_prompt "what is 10+4?" > output.log 2>&1 + python3 examples/python/model-qa.py -m ./example-models/qwen2.5-0.5b-instruct -e cpu --user_prompt "what is 10+4?" --non_interactive > output.log 2>&1 if cat output.log | grep -Eq "14|fourteen"; then echo "Result seems correct" else diff --git a/.github/workflows/linux-gpu-x64-build.yml b/.github/workflows/linux-gpu-x64-build.yml index f689754c4b..e6e5f73a9a 100644 --- a/.github/workflows/linux-gpu-x64-build.yml +++ b/.github/workflows/linux-gpu-x64-build.yml @@ -112,7 +112,7 @@ jobs: --multiple_repos \ --repository onnxruntimecudabuildx64 - - name: Config with Cmake in Docker + - name: Config with CMake in Docker run: | set -e -x docker run \ @@ -125,7 +125,7 @@ jobs: -DMANYLINUX=ON \ -DPYTHON_EXECUTABLE=${{ env.PYTHON_EXECUTABLE }} " - - name: Build with Cmake in Docker + - name: Build with CMake in Docker run: | set -e -x docker run \ @@ -136,7 +136,23 @@ jobs: bash -c " \ /usr/bin/cmake --build --preset linux_gcc_cuda_release && /usr/bin/cmake --build --preset linux_gcc_cuda_release --target PyPackageBuild" - - name: Install the onnxruntime-genai Python wheel and run python test + - name: Build the Java API and Run the Java Tests in Docker + run: | + set -e -x + docker run \ + --gpus all \ + --rm \ + --user 0 \ + --volume $GITHUB_WORKSPACE:/ort_genai_src \ + -w /ort_genai_src onnxruntimecudabuildx64 bash -c " \ + alias python3=${{ env.PYTHON_EXECUTABLE }} && \ + dnf -y update && dnf install -y python3.11-devel && dnf install -y python3-pip python3-setuptools python3-wheel && \ + ${{ env.PYTHON_EXECUTABLE }} -m pip install -r test/python/requirements.txt --user && \ + ${{ env.PYTHON_EXECUTABLE }} -m pip install -r test/python/cuda/torch/requirements.txt --user && \ + ${{ env.PYTHON_EXECUTABLE }} -m pip install -r test/python/cuda/ort/requirements.txt --user && \ + ${{ env.PYTHON_EXECUTABLE }} build.py --config=Release --build_dir build/cuda --build_java --parallel --cmake_generator Ninja --cmake_extra_defines PYTHON_EXECUTABLE=${{ env.PYTHON_EXECUTABLE }}" + + - name: Install the onnxruntime-genai Python wheel and run Python tests run: | echo "Installing the onnxruntime-genai Python wheel and running the Python tests" docker run \ diff --git a/.github/workflows/mac-cpu-arm64-build.yml b/.github/workflows/mac-cpu-arm64-build.yml index fc1c068494..127d164654 100644 --- a/.github/workflows/mac-cpu-arm64-build.yml +++ b/.github/workflows/mac-cpu-arm64-build.yml @@ -108,7 +108,7 @@ jobs: cmake --build --preset macos_arm64_cpu_release --target PyPackageBuild continue-on-error: false - - name: Install the python wheel and test dependencies + - name: Install the Python wheel and test dependencies run: | python3 -m venv genai-macos-venv source genai-macos-venv/bin/activate @@ -117,6 +117,12 @@ jobs: python3 -m pip install -r test/python/macos/ort/requirements.txt python3 -m pip install build/cpu/osx-arm64/wheel/onnxruntime_genai*.whl --no-deps + - name: Build the Java API and Run the Java Tests + run: | + set -e -x + source genai-macos-venv/bin/activate + python3 build.py --config=Release --build_dir build/cpu/osx-arm64 --build_java --parallel --cmake_generator "Unix Makefiles" --macos MacOSX --osx_arch arm64 --apple_deploy_target 12.0 --apple_sysroot macosx + - name: Remove the ort lib and header files run: | rm -rf ort @@ -130,7 +136,7 @@ jobs: # This will also download all the test models to the test/test_models directory # These models are used by the python tests as well as C#, C++ and others. - - name: Run the python tests + - name: Run the Python tests run: | source genai-macos-venv/bin/activate export HF_TOKEN="12345" @@ -144,11 +150,19 @@ jobs: cd test/csharp dotnet test /p:Configuration=Release /p:NativeBuildOutputDir="../../build/cpu/osx-arm64" --verbosity normal - - name: Build the Java API and Run the Java Tests + - name: Build the C# Examples run: | - set -e -x - source genai-macos-venv/bin/activate - python3 build.py --config=Release --build_dir build/cpu/osx-arm64 --build_java --parallel --cmake_generator "Unix Makefiles" --macos MacOSX --osx_arch arm64 --apple_deploy_target 12.0 --apple_sysroot macosx + export ORTGENAI_LOG_ORT_LIB=1 + cd examples/csharp/ModelChat + dotnet build -c Release + cd ../ModelMM + dotnet build -c Release + + - name: Test the C# LLM Example with Tool Calling + run: | + export ORTGENAI_LOG_ORT_LIB=1 + python3 test/python/special_tokens.py -p test/test_models/qwen-2.5-0.5b/int4/cpu/tokenizer.json -s "" -e "" + ./examples/csharp/ModelChat/bin/Release/net8.0/ModelChat -m test/test_models/qwen-2.5-0.5b/int4/cpu/ -e cpu --response_format lark_grammar --tools_file test/test_models/tool-definitions/weather.json --tool_call_start "" --tool_call_end "" --user_prompt "What is the weather in Redmond, WA?" --tool_output --non_interactive --verbose - name: Run tests run: | diff --git a/.github/workflows/win-cpu-arm64-build.yml b/.github/workflows/win-cpu-arm64-build.yml index 057151daf4..41f05cdcc6 100644 --- a/.github/workflows/win-cpu-arm64-build.yml +++ b/.github/workflows/win-cpu-arm64-build.yml @@ -93,10 +93,16 @@ jobs: run: | # Uninstalling LLVM/Clang as it is no longer required and causes issues with numpy installation choco uninstall llvm --yes - python -m pip install "numpy<2" coloredlogs flatbuffers packaging protobuf sympy pytest + python -m pip install -r test\python\requirements.txt --user + python -m pip install -r test\python\cpu\torch\requirements.txt --user + python -m pip install -r test\python\cpu\ort\requirements.txt --user python -m pip install onnxruntime-qnn python -m pip install (Get-ChildItem ("$env:binaryDir\wheel\*.whl")) --no-deps + - name: Build the Java API and Run the Java Tests + run: | + python build.py --config=Release --build_dir $env:binaryDir --build_java --parallel + - name: Run the Python Tests run: | python test/python/test_onnxruntime_genai.py --cwd "test\python" --test_models "test\test_models" @@ -106,9 +112,17 @@ jobs: cd test\csharp dotnet test /p:NativeBuildOutputDir="$env:GITHUB_WORKSPACE\$env:binaryDir\Release" /p:OrtLibDir="$env:GITHUB_WORKSPACE\ort\lib" - - name: Build the Java API and Run the Java Tests + - name: Build the C# Examples run: | - python build.py --config=Release --build_dir $env:binaryDir --build_java --parallel + cd examples\csharp\ModelChat + dotnet build -c Release + cd ..\ModelMM + dotnet build -c Release + + - name: Test the C# LLM Example with Tool Calling + run: | + python3 test\python\special_tokens.py -p test\test_models\qwen-2.5-0.5b\int4\cpu\tokenizer.json -s "" -e "" + .\examples\csharp\ModelChat\bin\Release\net8.0\ModelChat.exe -m test\test_models\qwen-2.5-0.5b\int4\cpu\ -e cpu --response_format lark_grammar --tools_file test\test_models\tool-definitions\weather.json --tool_call_start "" --tool_call_end "" --user_prompt "What is the weather in Redmond, WA?" --tool_output --non_interactive --verbose - name: Verify Build Artifacts if: always() diff --git a/.github/workflows/win-cpu-x64-build.yml b/.github/workflows/win-cpu-x64-build.yml index 723f52c624..9f6069e6e8 100644 --- a/.github/workflows/win-cpu-x64-build.yml +++ b/.github/workflows/win-cpu-x64-build.yml @@ -102,13 +102,17 @@ jobs: cmake --build --preset windows_x64_cpu_release --parallel cmake --build --preset windows_x64_cpu_release --target PyPackageBuild - - name: Install the python wheel and test dependencies + - name: Install the Python wheel and test dependencies run: | python3 -m pip install -r test\python\requirements.txt --user python3 -m pip install -r test\python\cpu\torch\requirements.txt --user python3 -m pip install -r test\python\cpu\ort\requirements.txt --user python3 -m pip install (Get-ChildItem ("$env:binaryDir\wheel\*.whl")) --no-deps + - name: Build the Java API and Run the Java Tests + run: | + python3 build.py --config=Release --build_dir $env:binaryDir --build_java --parallel + - name: Run the Python Tests run: | python test/python/test_onnxruntime_genai.py --cwd "test\python" --test_models "test\test_models" @@ -118,9 +122,17 @@ jobs: cd test\csharp dotnet test /p:NativeBuildOutputDir="$env:GITHUB_WORKSPACE\$env:binaryDir\Release" /p:OrtLibDir="$env:GITHUB_WORKSPACE\ort\lib" --verbosity normal - - name: Build the Java API and Run the Java Tests + - name: Build the C# Examples run: | - python3 build.py --config=Release --build_dir $env:binaryDir --build_java --parallel + cd examples\csharp\ModelChat + dotnet build -c Release + cd ..\ModelMM + dotnet build -c Release + + - name: Test the C# LLM Example with Tool Calling + run: | + python3 test\python\special_tokens.py -p test\test_models\qwen-2.5-0.5b\int4\cpu\tokenizer.json -s "" -e "" + .\examples\csharp\ModelChat\bin\Release\net8.0\ModelChat.exe -m test\test_models\qwen-2.5-0.5b\int4\cpu\ -e cpu --response_format lark_grammar --tools_file test\test_models\tool-definitions\weather.json --tool_call_start "" --tool_call_end "" --user_prompt "What is the weather in Redmond, WA?" --tool_output --non_interactive --verbose - name: Verify Build Artifacts if: always() diff --git a/.github/workflows/win-cuda-x64-build.yml b/.github/workflows/win-cuda-x64-build.yml index bd634fef5c..d036850864 100644 --- a/.github/workflows/win-cuda-x64-build.yml +++ b/.github/workflows/win-cuda-x64-build.yml @@ -98,6 +98,10 @@ jobs: python -m pip install -r test\python\cuda\ort\requirements.txt python -m pip install (Get-ChildItem ("$env:binaryDir\wheel\*.whl")) --no-deps + - name: Build the Java API and Run the Java Tests + run: | + python build.py --config=Release --build_dir $env:binaryDir --build_java --parallel + - name: Run the Python Tests run: | python test/python/test_onnxruntime_genai.py --cwd "test\python" --test_models "test\test_models" --e2e @@ -115,6 +119,18 @@ jobs: cd test\csharp dotnet test /p:Configuration=release /p:NativeBuildOutputDir="$env:GITHUB_WORKSPACE\$env:binaryDir\Release" /p:OrtLibDir="$env:GITHUB_WORKSPACE\ort\lib" + - name: Build the C# Examples + run: | + cd examples\csharp\ModelChat + dotnet build -c Release + cd ..\ModelMM + dotnet build -c Release + + - name: Test the C# LLM Example with Tool Calling + run: | + python test\python\special_tokens.py -p test\test_models\qwen-2.5-0.5b\int4\cpu\tokenizer.json -s "" -e "" + .\examples\csharp\ModelChat\bin\Release\net8.0\ModelChat.exe -m test\test_models\qwen-2.5-0.5b\int4\cpu\ -e cpu --response_format lark_grammar --tools_file test\test_models\tool-definitions\weather.json --tool_call_start "" --tool_call_end "" --user_prompt "What is the weather in Redmond, WA?" --tool_output --non_interactive --verbose + - name: Prepend CUDA to PATH and Run tests run: |- $env:PATH = "${{ env.cuda_dir }}\\v${{ env.cuda_version }}\\bin;" + $env:PATH diff --git a/.github/workflows/win-directml-x64-build.yml b/.github/workflows/win-directml-x64-build.yml index 6377573575..b477ae459a 100644 --- a/.github/workflows/win-directml-x64-build.yml +++ b/.github/workflows/win-directml-x64-build.yml @@ -114,6 +114,10 @@ jobs: python -m pip install -r test\python\directml\ort\requirements.txt python -m pip install (Get-ChildItem ("$env:binaryDir\wheel\*.whl")) --no-deps + - name: Build the Java API and Run the Java Tests + run: | + python build.py --config=Release --build_dir $env:binaryDir --build_java --parallel + - name: Run the Python Tests run: | python test/python/test_onnxruntime_genai.py --cwd "test\python" --test_models "test\test_models" --e2e diff --git a/.gitignore b/.gitignore index e89b87511e..dcd6a9ebe8 100644 --- a/.gitignore +++ b/.gitignore @@ -27,7 +27,7 @@ models_outputs_cpu benchmark/python/output examples/python/genai_models examples/python/hf_cache -examples/csharp/HelloPhi/models +examples/csharp/ModelChat/models !test/test_models/hf-internal-testing/ !test/test_models/hf-internal-testing/tiny-random-gpt2*/*.onnx diff --git a/.pipelines/stages/jobs/custom-nuget-packaging-job.yml b/.pipelines/stages/jobs/custom-nuget-packaging-job.yml index 3308363a1c..0152a6ef1b 100644 --- a/.pipelines/stages/jobs/custom-nuget-packaging-job.yml +++ b/.pipelines/stages/jobs/custom-nuget-packaging-job.yml @@ -167,4 +167,3 @@ jobs: inputs: targetPath: '$(Build.ArtifactStagingDirectory)\nuget' artifactName: $(genai_nuget_package_name) - \ No newline at end of file diff --git a/.pipelines/stages/jobs/nuget-validation-job.yml b/.pipelines/stages/jobs/nuget-validation-job.yml index 80df35a438..9d3472421e 100644 --- a/.pipelines/stages/jobs/nuget-validation-job.yml +++ b/.pipelines/stages/jobs/nuget-validation-job.yml @@ -98,6 +98,16 @@ jobs: ${{ else }}: value: 'cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4' + - name: prebuild_phi4_mm_model_folder + ${{ if eq(parameters.ep, 'cpu') }}: + value: 'cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4' + ${{ elseif eq(parameters.ep, 'cuda') }}: + value: 'gpu/gpu-int4-rtn-block-32' + ${{ elseif eq(parameters.ep, 'directml')}}: + value: 'gpu/gpu-int4-rtn-block-32' + ${{ else }}: + value: 'cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4' + - name: cuda_docker_image ${{ if eq(parameters.cuda_version, '11.8') }}: value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda11_x64_almalinux8_gcc11:20250714.2 @@ -143,14 +153,14 @@ jobs: HuggingFaceRepo: 'microsoft/Phi-3-mini-4k-instruct-onnx' LocalFolder: 'phi3-mini' RepoFolder: $(prebuild_phi3_mini_model_folder) - WorkingDirectory: '$(Build.Repository.LocalPath)/examples/csharp/HelloPhi' + WorkingDirectory: '$(Build.Repository.LocalPath)/examples/csharp/ModelChat' HuggingFaceToken: $(HF_TOKEN) os: ${{ parameters.os }} - template: steps/nuget-validation-step.yml parameters: - CsprojFolder: "examples/csharp/HelloPhi" - CsprojName: "HelloPhi" + CsprojFolder: "examples/csharp/ModelChat" + CsprojName: "ModelChat" CsprojConfiguration: $(csproj_configuration) LocalFolder: 'phi3-mini' ModelFolder: $(prebuild_phi3_mini_model_folder) @@ -160,14 +170,14 @@ jobs: HuggingFaceRepo: 'microsoft/Phi-3.5-vision-instruct-onnx' LocalFolder: 'phi3.5-vision' RepoFolder: $(prebuild_phi3_5_vision_model_folder) - WorkingDirectory: '$(Build.Repository.LocalPath)/examples/csharp/HelloPhi3V' + WorkingDirectory: '$(Build.Repository.LocalPath)/examples/csharp/ModelMM' HuggingFaceToken: $(HF_TOKEN) os: ${{ parameters.os }} - template: steps/nuget-validation-step.yml parameters: - CsprojFolder: "examples/csharp/HelloPhi3V" - CsprojName: "HelloPhi3V" + CsprojFolder: "examples/csharp/ModelMM" + CsprojName: "ModelMM" CsprojConfiguration: $(csproj_configuration) LocalFolder: 'phi3.5-vision' ModelFolder: $(prebuild_phi3_5_vision_model_folder) @@ -177,14 +187,14 @@ jobs: HuggingFaceRepo: 'microsoft/Phi-4-multimodal-instruct-onnx' LocalFolder: 'phi4-mm' RepoFolder: $(prebuild_phi4_mm_model_folder) - WorkingDirectory: '$(Build.Repository.LocalPath)/examples/csharp/HelloPhi4MM' + WorkingDirectory: '$(Build.Repository.LocalPath)/examples/csharp/ModelMM' HuggingFaceToken: $(HF_TOKEN) os: ${{ parameters.os }} - template: steps/nuget-validation-step.yml parameters: - CsprojFolder: "examples/csharp/HelloPhi4MM" - CsprojName: "HelloPhi4MM" + CsprojFolder: "examples/csharp/ModelMM" + CsprojName: "ModelMM" CsprojConfiguration: $(csproj_configuration) LocalFolder: 'phi4-mm' ModelFolder: $(prebuild_phi4_mm_model_folder) diff --git a/.pipelines/stages/jobs/py-validation-job.yml b/.pipelines/stages/jobs/py-validation-job.yml index 426be14ba2..1b825c094b 100644 --- a/.pipelines/stages/jobs/py-validation-job.yml +++ b/.pipelines/stages/jobs/py-validation-job.yml @@ -211,7 +211,7 @@ jobs: - template: steps/python-validation-step.yml parameters: PythonScriptFolder: "examples/python" - PythonScriptName: "phi4-mm.py" + PythonScriptName: "model-mm.py" LocalFolder: 'phi4-mm' ModelFolder: $(prebuild_phi4_mm_model_folder) diff --git a/.pipelines/stages/jobs/steps/nuget-validation-step.yml b/.pipelines/stages/jobs/steps/nuget-validation-step.yml index e5004545fd..c4b9a5682d 100644 --- a/.pipelines/stages/jobs/steps/nuget-validation-step.yml +++ b/.pipelines/stages/jobs/steps/nuget-validation-step.yml @@ -34,7 +34,7 @@ steps: Copy-Item -Force -Recurse -Verbose $(Build.BinariesDirectory)/nuget/* -Destination ${{ parameters.CsprojFolder }} cd ${{ parameters.CsprojFolder }} dotnet restore -r $(os)-$(arch) /property:Configuration=${{ parameters.CsprojConfiguration }} --source https://api.nuget.org/v3/index.json --source https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/nuget/v3/index.json --source $PWD --disable-parallel --verbosity detailed - dotnet run -r $(os)-$(arch) --configuration ${{ parameters.CsprojConfiguration }} --no-restore --verbosity normal -- -m ./${{ parameters.LocalFolder }}/${{ parameters.ModelFolder }} -e $(ep) --non-interactive + dotnet run -r $(os)-$(arch) --configuration ${{ parameters.CsprojConfiguration }} --no-restore --verbosity normal -- -m ./${{ parameters.LocalFolder }}/${{ parameters.ModelFolder }} -e $(ep) --non_interactive displayName: 'Run ${{ parameters.CsprojName }} With Artifact on Windows' workingDirectory: '$(Build.Repository.LocalPath)' condition: eq(variables['os'], 'win') @@ -49,7 +49,7 @@ steps: cd ${{ parameters.CsprojFolder }} dotnet restore -r $(os)-$(arch) /property:Configuration=${{ parameters.CsprojConfiguration }} --source https://api.nuget.org/v3/index.json --source https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/nuget/v3/index.json --source $PWD --disable-parallel --verbosity detailed dotnet build ./${{ parameters.CsprojName }}.csproj -r $(os)-$(arch) /property:Configuration=${{ parameters.CsprojConfiguration }} --no-restore --self-contained --verbosity normal - ls -l ./bin/${{ parameters.CsprojConfiguration }}/net6.0/$(os)-$(arch)/ + ls -l ./bin/${{ parameters.CsprojConfiguration }}/net8.0/$(os)-$(arch)/ displayName: 'Perform dotnet restore & build' workingDirectory: '$(Build.Repository.LocalPath)' condition: or(eq(variables['os'], 'linux'), eq(variables['os'], 'osx')) @@ -70,8 +70,8 @@ steps: bash -c " \ export ORTGENAI_LOG_ORT_LIB=1 && \ cd /ort_genai_src/${{ parameters.CsprojFolder }} && \ - chmod +x ./bin/Release_Cuda/net6.0/linux-x64/${{ parameters.CsprojName }} && \ - ./bin/Release_Cuda/net6.0/linux-x64/${{ parameters.CsprojName }} -m ./${{ parameters.LocalFolder }}/${{ parameters.ModelFolder }} -e $(ep) --non-interactive" + chmod +x ./bin/Release_Cuda/net8.0/linux-x64/${{ parameters.CsprojName }} && \ + ./bin/Release_Cuda/net8.0/linux-x64/${{ parameters.CsprojName }} -m ./${{ parameters.LocalFolder }}/${{ parameters.ModelFolder }} -e $(ep) --non_interactive" displayName: 'Run ${{ parameters.CsprojName }} With Artifact on Linux CUDA' workingDirectory: '$(Build.Repository.LocalPath)' @@ -80,7 +80,7 @@ steps: - bash: | export ORTGENAI_LOG_ORT_LIB=1 cd ${{ parameters.CsprojFolder }} - dotnet run -r $(os)-$(arch) --configuration ${{ parameters.CsprojConfiguration }} --no-build --verbosity normal -- -m ./${{ parameters.LocalFolder }}/${{ parameters.ModelFolder }} -e $(ep) --non-interactive + dotnet run -r $(os)-$(arch) --configuration ${{ parameters.CsprojConfiguration }} --no-build --verbosity normal -- -m ./${{ parameters.LocalFolder }}/${{ parameters.ModelFolder }} -e $(ep) --non_interactive displayName: 'Run ${{ parameters.CsprojName }} With Artifact on Linux/macOS CPU' workingDirectory: '$(Build.Repository.LocalPath)' condition: and(or(eq(variables['os'], 'linux'), eq(variables['os'], 'osx')), eq(variables['ep'], 'cpu')) diff --git a/.pipelines/stages/jobs/steps/python-validation-step.yml b/.pipelines/stages/jobs/steps/python-validation-step.yml index 48bbb6e691..7424541526 100644 --- a/.pipelines/stages/jobs/steps/python-validation-step.yml +++ b/.pipelines/stages/jobs/steps/python-validation-step.yml @@ -46,9 +46,9 @@ steps: python -m pip install --no-index --find-links=$(Build.BinariesDirectory)/wheel $(pip_package_name) if ("$(ep)" -eq "directml") { - python ${{ parameters.PythonScriptName }} -m .\${{ parameters.LocalFolder }}\${{ parameters.ModelFolder }} -e dml --non-interactive + python ${{ parameters.PythonScriptName }} -m .\${{ parameters.LocalFolder }}\${{ parameters.ModelFolder }} -e dml --non_interactive } else { - python ${{ parameters.PythonScriptName }} -m .\${{ parameters.LocalFolder }}\${{ parameters.ModelFolder }} -e $(ep) --non-interactive + python ${{ parameters.PythonScriptName }} -m .\${{ parameters.LocalFolder }}\${{ parameters.ModelFolder }} -e $(ep) --non_interactive } displayName: 'Run ${{ parameters.PythonScriptName }} With Artifact on Windows' workingDirectory: '$(Build.Repository.LocalPath)' @@ -73,7 +73,7 @@ steps: $python_exe -m pip install -r /ort_genai_src/test/python/cuda/ort/requirements.txt && \ cd /ort_genai_src/${{ parameters.PythonScriptFolder }} && \ $python_exe -m pip install --no-index --find-links=/ort_genai_binary/wheel $(pip_package_name) && \ - $python_exe ${{ parameters.PythonScriptName }} -m ./${{ parameters.LocalFolder }}/${{ parameters.ModelFolder }} -e $(ep) --non-interactive" + $python_exe ${{ parameters.PythonScriptName }} -m ./${{ parameters.LocalFolder }}/${{ parameters.ModelFolder }} -e $(ep) --non_interactive" displayName: 'Run ${{ parameters.PythonScriptName }} With Artifact on Linux CUDA' workingDirectory: '$(Build.Repository.LocalPath)' @@ -92,7 +92,7 @@ steps: fi cd ${{ parameters.PythonScriptFolder }} python -m pip install --no-index --find-links=$(Build.BinariesDirectory)/wheel $(pip_package_name) - python ${{ parameters.PythonScriptName }} -m ./${{ parameters.LocalFolder }}/${{ parameters.ModelFolder }} -e $(ep) --non-interactive + python ${{ parameters.PythonScriptName }} -m ./${{ parameters.LocalFolder }}/${{ parameters.ModelFolder }} -e $(ep) --non_interactive displayName: 'Run ${{ parameters.PythonScriptName }} With Artifact on Linux/macOS CPU' workingDirectory: '$(Build.Repository.LocalPath)' condition: and(or(eq(variables['os'], 'linux'), eq(variables['os'], 'osx')), eq(variables['ep'], 'cpu')) \ No newline at end of file diff --git a/README.md b/README.md index 29313d8feb..aceeaedf66 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,7 @@ See [installation instructions](https://onnxruntime.ai/docs/genai/howto/install) model = og.Model('cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4') tokenizer = og.Tokenizer(model) - tokenizer_stream = tokenizer.create_stream() + stream = tokenizer.create_stream() # Set the max length to something sensible by default, # since otherwise it will be set to the entire context length @@ -81,7 +81,7 @@ See [installation instructions](https://onnxruntime.ai/docs/genai/howto/install) while not generator.is_done(): generator.generate_next_token() new_token = generator.get_next_tokens()[0] - print(tokenizer_stream.decode(new_token), end='', flush=True) + print(stream.decode(new_token), end='', flush=True) except KeyboardInterrupt: print(" --control+c pressed, aborting generation--") diff --git a/build.py b/build.py index c3b6e915ca..ff595232ce 100644 --- a/build.py +++ b/build.py @@ -775,16 +775,17 @@ def build_examples(args: argparse.Namespace, env: dict[str, str]): samples_to_build = [ "-DMODEL_QA=ON", "-DMODEL_CHAT=ON", - "-DMODEL_VISION=ON", - "-DPHI4-MM=ON", + "-DMODEL_MM=ON", "-DWHISPER=ON", ] - include_dir = REPO_ROOT / "src" - lib_dir = args.build_dir + ort_include_dir = REPO_ROOT / "ort" / "include" + ort_lib_dir = REPO_ROOT / "ort" / "lib" + oga_include_dir = REPO_ROOT / "src" + oga_lib_dir = args.build_dir if util.is_windows(): # On Windows, the library files are in a subdirectory named after the configuration (e.g. Debug, Release, etc.) - lib_dir = lib_dir / args.config + oga_lib_dir = oga_lib_dir / args.config cmake_command = ( [ @@ -798,8 +799,10 @@ def build_examples(args: argparse.Namespace, env: dict[str, str]): ] + samples_to_build + [ - "-DORT_GENAI_INCLUDE_DIR=" + str(include_dir), - "-DORT_GENAI_LIB_DIR=" + str(lib_dir), + "-DORT_INCLUDE_DIR=" + str(ort_include_dir), + "-DORT_LIB_DIR=" + str(ort_lib_dir), + "-DOGA_INCLUDE_DIR=" + str(oga_include_dir), + "-DOGA_LIB_DIR=" + str(oga_lib_dir), ] ) diff --git a/docs/ConstrainedDecoding.md b/docs/ConstrainedDecoding.md new file mode 100644 index 0000000000..b45d3f2110 --- /dev/null +++ b/docs/ConstrainedDecoding.md @@ -0,0 +1,10 @@ +## Constrained Decoding + +Constrained Decoding is useful when using function/tool calling as it helps in ensuring the output is in the correct format (i.e. ensures structured outputs). + +We have integrated [LLGuidance](https://github.com/guidance-ai/llguidance) for constrained decoding. There are three types of constrained decoding enabled right now: +1. Lark Grammar (Recommended): This option allows you to have an option for a regular output as well as function/tool output in JSON format. +2. JSON Schema: Output will be JSON schema and it will be one of the function/tools provided. +3. Regex: If a particular regular expression is desired. + +To ensure that the function/tool calling works correctly with constrained decoding, you need to modify your tokenizer.json file. For each model that has its own tool calling token, the tool calling token's `special` attribute needs to be set to true. For example, Phi-4 mini uses the <|tool_call|> and <|/tool_call|> tokens so you should set the `special` attribute for them as `true` inside `tokenizer.json`. diff --git a/documents/DownloadModels.md b/docs/DownloadModels.md similarity index 88% rename from documents/DownloadModels.md rename to docs/DownloadModels.md index 3221b992f4..88f0342537 100644 --- a/documents/DownloadModels.md +++ b/docs/DownloadModels.md @@ -1,10 +1,7 @@ # Download Options for ONNX Runtime GenAI Models -This guide covers two easy ways to download models for use with ONNX Runtime GenAI: +This guide covers ways to download models for use with ONNX Runtime GenAI. -Using Foundry Local - -Using Hugging Face CLI ## Download via Foundry Local @@ -32,7 +29,7 @@ Using Hugging Face CLI huggingface-cli download --include /* --local-dir . ``` - For example, to download the Phi-4 mini instruct gpu model: + For example, to download the Phi-4 mini instruct generic-GPU model: ``` huggingface-cli download microsoft/Phi-4-mini-instruct-onnx --include gpu/* --local-dir . ``` diff --git a/documents/Runtime_option.md b/docs/RuntimeOptions.md similarity index 78% rename from documents/Runtime_option.md rename to docs/RuntimeOptions.md index b8490c1cde..1c3c3c2ee1 100644 --- a/documents/Runtime_option.md +++ b/docs/RuntimeOptions.md @@ -1,6 +1,6 @@ # Runtime Options -This file will provide details on the usage of SetRuntimeOption API. It will list all the current key value pairs which can be used as an input for this API. +This file will provide details on the usage of the SetRuntimeOption API. It will list all the current key value pairs which can be used as an input for this API. ## Set Terminate diff --git a/examples/c/CMakeLists.txt b/examples/c/CMakeLists.txt index d6d514cb3b..226e88d994 100644 --- a/examples/c/CMakeLists.txt +++ b/examples/c/CMakeLists.txt @@ -3,49 +3,91 @@ cmake_minimum_required(VERSION 3.18.1) project(ortgenaiapp) set(CMAKE_CXX_STANDARD 20) +# Download and make available nlohmann/json +include(FetchContent) +FetchContent_Declare( + nlohmann_json + GIT_REPOSITORY https://github.com/nlohmann/json.git + GIT_TAG v3.12.0 # Or update to latest release +) +FetchContent_MakeAvailable(nlohmann_json) + +# Download and make available CLI11 +include(FetchContent) +FetchContent_Declare( + CLI11 + GIT_REPOSITORY https://github.com/CLIUtils/CLI11.git + GIT_TAG v2.6.1 # Or update to latest release +) +FetchContent_MakeAvailable(CLI11) + option(USE_CXX "Invoke the C++ example" ON) option(MODEL_CHAT "Build the Model Chat example" OFF) -option(MODEL_QA "Build the Model Q&A example without multi-turn prompting" OFF) -option(MODEL_VISION "Build the Model Vision example" OFF) -option(PHI4-MM "Build the Phi-4 mm example" OFF) +option(MODEL_QA "Build the Model Q&A example" OFF) +option(MODEL_MM "Build the Model Multimodal example" OFF) option(WHISPER "Build the Whisper example" OFF) if(USE_CXX) add_compile_definitions(USE_CXX) endif() +# Set expected library filenames if(WIN32) - set(ONNXRUNTIME_GENAI_LIB "onnxruntime-genai.dll") + set(ORT_LIB_FILE "onnxruntime.dll") + set(OGA_LIB_FILE "onnxruntime-genai.dll") elseif(APPLE) - set(ONNXRUNTIME_GENAI_LIB "libonnxruntime-genai.dylib") + set(ORT_LIB_FILE "libonnxruntime.dylib") + set(OGA_LIB_FILE "libonnxruntime-genai.dylib") elseif(CMAKE_SYSTEM_NAME MATCHES "AIX") - set(ONNXRUNTIME_GENAI_LIB "libonnxruntime-genai.a") + set(ORT_LIB_FILE "libonnxruntime.a") + set(OGA_LIB_FILE "libonnxruntime-genai.a") else() - set(ONNXRUNTIME_GENAI_LIB "libonnxruntime-genai.so") + set(ORT_LIB_FILE "libonnxruntime.so") + set(OGA_LIB_FILE "libonnxruntime-genai.so") endif() -# Set default library directory if not specified -if(NOT ORT_GENAI_LIB_DIR) - set(ORT_GENAI_LIB_DIR "${CMAKE_SOURCE_DIR}/lib") +# Set default variables to examples/c/include and examples/c/lib if not specified +if(NOT ORT_INCLUDE_DIR) + set(ORT_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/include") +endif() +if(NOT OGA_INCLUDE_DIR) + set(OGA_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/include") +endif() +if(NOT ORT_LIB_DIR) + set(ORT_LIB_DIR "${CMAKE_SOURCE_DIR}/lib") +endif() +if(NOT OGA_LIB_DIR) + set(OGA_LIB_DIR "${CMAKE_SOURCE_DIR}/lib") endif() -file(GLOB ort_genai_libs "${ORT_GENAI_LIB_DIR}/*") - -message(STATUS "ORT_GENAI_LIB_DIR: ${ORT_GENAI_LIB_DIR}") +# Store all library files in each directory +file(GLOB ort_libs "${ORT_LIB_DIR}/*") +file(GLOB oga_libs "${OGA_LIB_DIR}/*") function(prepare_executable executable) - target_link_directories(${executable} PRIVATE ${ORT_GENAI_LIB_DIR}) - target_link_libraries(${executable} PRIVATE ${ONNXRUNTIME_GENAI_LIB}) + # Link directory and library for ORT and ORT GenAI + target_link_directories(${executable} PRIVATE ${ORT_LIB_DIR}) + target_link_libraries(${executable} PRIVATE ${ORT_LIB_FILE}) + target_link_directories(${executable} PRIVATE ${OGA_LIB_DIR}) + target_link_libraries(${executable} PRIVATE ${OGA_LIB_FILE}) - if (ORT_GENAI_INCLUDE_DIR) - target_include_directories(${executable} PRIVATE ${ORT_GENAI_INCLUDE_DIR}) - else() - target_include_directories(${executable} PRIVATE ${CMAKE_SOURCE_DIR}/include) - endif() + # Add include directories for each executable + target_include_directories(${executable} PRIVATE ${ORT_INCLUDE_DIR}) + target_include_directories(${executable} PRIVATE ${OGA_INCLUDE_DIR}) + target_link_libraries(${executable} PUBLIC onnxruntime) target_link_libraries(${executable} PUBLIC onnxruntime-genai) - foreach(DEPENDENCY_FILE ${ort_genai_libs}) + foreach(DEPENDENCY_FILE ${ort_libs}) + if (NOT IS_DIRECTORY ${DEPENDENCY_FILE}) + add_custom_command( + TARGET ${executable} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${DEPENDENCY_FILE} $ + ) + endif() + endforeach() + + foreach(DEPENDENCY_FILE ${oga_libs}) if (NOT IS_DIRECTORY ${DEPENDENCY_FILE}) add_custom_command( TARGET ${executable} POST_BUILD @@ -60,24 +102,27 @@ set(EXAMPLES_SOURCE_DIR ${CMAKE_SOURCE_DIR}/src) if(MODEL_CHAT) add_executable(model_chat ${EXAMPLES_SOURCE_DIR}/model_chat.cpp ${EXAMPLES_SOURCE_DIR}/common.cpp) prepare_executable(model_chat) + target_link_libraries(model_chat PRIVATE nlohmann_json::nlohmann_json) + target_link_libraries(model_chat PRIVATE CLI11::CLI11) endif() if(MODEL_QA) add_executable(model_qa ${EXAMPLES_SOURCE_DIR}/model_qa.cpp ${EXAMPLES_SOURCE_DIR}/common.cpp) prepare_executable(model_qa) + target_link_libraries(model_qa PRIVATE nlohmann_json::nlohmann_json) + target_link_libraries(model_qa PRIVATE CLI11::CLI11) endif() -if(MODEL_VISION) - add_executable(model_vision ${EXAMPLES_SOURCE_DIR}/model_vision.cpp ${EXAMPLES_SOURCE_DIR}/common.cpp) - prepare_executable(model_vision) -endif() - -if(PHI4-MM) - add_executable(phi4-mm ${CMAKE_SOURCE_DIR}/src/phi4-mm.cpp ${EXAMPLES_SOURCE_DIR}/common.cpp) - prepare_executable(phi4-mm) +if(MODEL_MM) + add_executable(model_mm ${EXAMPLES_SOURCE_DIR}/model_mm.cpp ${EXAMPLES_SOURCE_DIR}/common.cpp) + prepare_executable(model_mm) + target_link_libraries(model_mm PRIVATE nlohmann_json::nlohmann_json) + target_link_libraries(model_mm PRIVATE CLI11::CLI11) endif() if(WHISPER) - add_executable(whisper ${CMAKE_SOURCE_DIR}/src/whisper.cpp ${EXAMPLES_SOURCE_DIR}/common.cpp) + add_executable(whisper ${EXAMPLES_SOURCE_DIR}/whisper.cpp ${EXAMPLES_SOURCE_DIR}/common.cpp) prepare_executable(whisper) + target_link_libraries(whisper PRIVATE nlohmann_json::nlohmann_json) + target_link_libraries(whisper PRIVATE CLI11::CLI11) endif() diff --git a/examples/c/README.md b/examples/c/README.md index a947b06493..2301ce37a9 100644 --- a/examples/c/README.md +++ b/examples/c/README.md @@ -1,21 +1,16 @@ -# ONNX Runtime GenAI C Example +# ONNX Runtime GenAI C/C++ Examples -> 📝 **Note:** The examples from the main branch of this repository are compatible with the binaries build from the same commit. Therefore, if using the example from `main`, ONNX Runtime GenAI needs to be built from source. If this is your scenario, just build the library and the examples will be auto built along with the library. -If this is not your scenario, please use prebuilt binaries from the release you're interested in and use the examples from the same version tag and follow the steps below. +> 📝 **Note:** The examples from the main branch of this repository are compatible with the binaries built from the same commit. Therefore, if using the example from `main`, ONNX Runtime GenAI needs to be built from source. If this is your scenario, just build the library and the examples will be auto built along with the library. If this is not your scenario, please use prebuilt binaries from the release you're interested in and use the examples from the same version tag and follow the steps below. -## Download the model +## Install ONNX Runtime GenAI -1. Download and install [foundry-local](https://github.com/microsoft/Foundry-Local/releases) -2. List available models: `foundry model list` -3. Download a model you would like to run. For example: `foundry model download Phi-4-generic-cpu` -4. Find out where the model is saved on disk: `foundry cache location` -5. Identify the path to the model on disk. For example: `C:\Users\\.foundry\Microsoft\Phi-4-generic-cpu\cpu-int4-rtn-block-32-acc-level-4` +Install the C headers according to the [installation instructions](https://onnxruntime.ai/docs/genai/howto/install) or [build from source](https://onnxruntime.ai/docs/genai/howto/build-from-source.html). -> 📝 **Note:** Foundry Local CLI is not available on Linux at the moment. Please download the model from a Windows or a macOS machine and copy it over to your Linux machine if you would like to run on Linux. +## Download a Model -For other options to download models, read through [our download options](https://github.com/microsoft/onnxruntime-genai/blob/main/documents/DownloadModels.md). +There are many places to obtain a model. Please read through [our download options](https://github.com/microsoft/onnxruntime-genai/blob/main/docs/DownloadModels.md). -## Build the C++ Example +## Build a C/C++ Example 1. Clone the repo: `git clone https://github.com/microsoft/onnxruntime-genai.git` - Use the relevant release tag that aligns with the version of the libraries you're planning to use. @@ -59,11 +54,65 @@ For other options to download models, read through [our download options](https: cmake --build build --parallel --config Debug ``` -## Run the sample +## Run an Example 1. On Windows: - - cd build\Debug - - .\model_qa.exe + +```powershell +# Prerequisite: navigate to the compiled binaries. +cd build\Debug +``` + +```powershell +# The `model-chat` script allows for multi-turn conversations. +.\model_chat.exe -m {path to model folder} -e {execution provider} +``` + +```powershell +# The `model-qa` script streams the output text token by token. +.\model_qa.exe -m {path to model folder} -e {execution provider} +``` + +```powershell +# The `model-mm` script works for multi-modal models and streams the output text token by token. +.\model_mm.exe -m {path to model folder} -e {execution provider} +``` + 2. On Linux and macOS: - - cd build - - ./model_qa + +```powershell +# Prerequisite: navigate to the compiled binaries. +cd build +``` + +```bash +# The `model-chat` script allows for multi-turn conversations. +./model_chat -m {path to model folder} -e {execution provider} +``` + +```bash +# The `model-qa` script streams the output text token by token. +./model_qa -m {path to model folder} -e {execution provider} +``` + +```bash +# The `model-mm` script works for multi-modal models and streams the output text token by token. +./model_mm -m {path to model folder} -e {execution provider} +``` + +## Tool Calling + +Please read through [our constrained decoding](https://github.com/microsoft/onnxruntime-genai/blob/main/docs/ConstrainedDecoding.md) options to learn more. + +Here are some examples of how you can run the C/C++ examples with function/tool calling. + +```bash +# Using JSON Schema with only tool call output +./model_qa -m {path to model folder} -e {execution provider} --response_format json_schema --tools_file {path to json file} --tool_output --tool_call_start "{starting tool call token}" --tool_call_end "{ending tool call token}" + +# Using Lark Grammar with only tool call output +./model_mm -m {path to model folder} -e {execution provider} --response_format lark_grammar --tools_file {path to json file} --tool_output --tool_call_start "{starting tool call token}" --tool_call_end "{ending tool call token}" + +# Using Lark Grammar with text or tool call output +./model_chat -m {path to model folder} -e {execution provider} --response_format lark_grammar --tools_file {path to json file} --text_output --tool_output --tool_call_start "{starting tool call token}" --tool_call_end "{ending tool call token}" +``` diff --git a/examples/c/src/common.cpp b/examples/c/src/common.cpp index b91bfc3c81..15159e779a 100644 --- a/examples/c/src/common.cpp +++ b/examples/c/src/common.cpp @@ -1,9 +1,10 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "common.h" #include +#include "common.h" + void Timing::RecordStartTimestamp() { assert(start_timestamp_.time_since_epoch().count() == 0); start_timestamp_ = Clock::now(); @@ -39,10 +40,6 @@ void Timing::Log(const int prompt_tokens_length, const int new_tokens_length) { std::cout << "-------------" << std::endl; } -bool FileExists(const char* path) { - return static_cast(std::ifstream(path)); -} - std::string Trim(const std::string& str) { const size_t first = str.find_first_not_of(' '); if (std::string::npos == first) { @@ -52,69 +49,642 @@ std::string Trim(const std::string& str) { return str.substr(first, (last - first + 1)); } -static void print_usage(int /*argc*/, char** argv) { - std::cerr << "usage: " << argv[0] << " [execution_provider] [ep_library_path]" << std::endl; - std::cerr << " model_path: [required] Path to the folder containing onnx models, genai_config.json, etc." << std::endl; - std::cerr << " execution_provider: [optional] Force use of a particular execution provider (e.g. \"cpu\", \"cuda\", \"NvTensorRtRtx\")" << std::endl; - std::cerr << " If not specified, EP / provider options specified in genai_config.json will be used." << std::endl; - std::cerr << " ep_library_path: [optional] Path to execution provider DLL/SO for plug-in providers" << std::endl; - std::cerr << " Example: onnxruntime_providers_cuda.dll or onnxruntime_providers_tensorrt.dll" << std::endl; - std::cerr << std::endl; - std::cerr << "Examples:" << std::endl; - std::cerr << " " << argv[0] << " /path/to/model" << std::endl; - std::cerr << " " << argv[0] << " /path/to/model cuda" << std::endl; - std::cerr << " " << argv[0] << " /path/to/model cuda /path/to/onnxruntime_providers_cuda.dll" << std::endl; - std::cerr << " " << argv[0] << " /path/to/model NvTensorRtRtx /path/to/onnxruntime_providers_tensorrt.dll" << std::endl; -} - -bool parse_args(int argc, char** argv, std::string& model_path, std::string& ep, std::string* ep_library_path) { - if (argc < 2) { - print_usage(argc, argv); +// Define to_json and from_json for std::optional +// Must be done within nlohmann::adl_serializer and not as standalone methods +namespace nlohmann { +template +struct adl_serializer> { + static void to_json(nlohmann::ordered_json& j, const std::optional& opt) { + if (opt.has_value()) { + j = *opt; + } else { + j = nullptr; + } + } + static void from_json(const nlohmann::ordered_json& j, std::optional& opt) { + if (j.is_null()) { + opt = std::nullopt; + return; + } + opt = j.get(); + } +}; +} // namespace nlohmann + +void to_json(nlohmann::ordered_json& j, const ToolSchema& tool) { + j = nlohmann::ordered_json{{"description", tool.description}, {"type", tool.type}, {"properties", tool.properties}, {"required", tool.required}, {"additionalProperties", tool.additionalProperties}}; +} + +void from_json(const nlohmann::ordered_json& j, ToolSchema& tool) { + j.at("type").get_to(tool.type); + + if (j.contains("description")) { + j.at("description").get_to(tool.description); + } + + if (j.contains("properties")) { + tool.properties = j.at("properties"); + } + + if (j.contains("required")) { + j.at("required").get_to(tool.required); + } + + if (j.contains("additionalProperties")) { + j.at("additionalProperties").get_to(tool.additionalProperties); + } else { + tool.additionalProperties = false; + } +} + +void to_json(nlohmann::ordered_json& j, const JsonSchema& schema) { + j = nlohmann::ordered_json{{"x-guidance", schema.xGuidance}, {"type", schema.type}, {"items", schema.items}, {"minItems", schema.minItems}}; +} + +void from_json(const nlohmann::ordered_json& j, JsonSchema& schema) { + j.at("x-guidance").get_to(schema.xGuidance); + j.at("type").get_to(schema.type); + j.at("items").get_to(schema.items); + j.at("minItems").get_to(schema.minItems); +} + +void to_json(nlohmann::ordered_json& j, const FunctionDefinition& func) { + j = nlohmann::ordered_json{{"name", func.name}, {"description", func.description}, {"parameters", func.parameters}}; +} + +void from_json(const nlohmann::ordered_json& j, FunctionDefinition& func) { + j.at("name").get_to(func.name); + + if (j.contains("description")) { + j.at("description").get_to(func.description); + } + + if (j.contains("parameters")) { + func.parameters = j.at("parameters"); + } +} + +void to_json(nlohmann::ordered_json& j, const Tool& t) { + j = nlohmann::ordered_json{{"type", t.type}, {"function", t.function}}; +} + +void from_json(const nlohmann::ordered_json& j, Tool& t) { + j.at("type").get_to(t.type); + j.at("function").get_to(t.function); +} + +void to_json(nlohmann::ordered_json& j, const GeneratorParamsArgs& a) { + j = nlohmann::ordered_json{{"batch_size", a.batch_size}, {"num_beams", a.num_beams}, {"num_return_sequences", a.num_return_sequences}}; + // Add optional generator params if provided + if (a.chunk_size != 0) j["chunk_size"] = a.chunk_size; + if (a.do_sample) j["do_sample"] = a.do_sample.value(); + if (a.min_length) j["min_length"] = a.min_length.value(); + if (a.max_length) j["max_length"] = a.max_length.value(); + if (a.repetition_penalty) j["repetition_penalty"] = a.repetition_penalty.value(); + if (a.temperature) j["temperature"] = a.temperature.value(); + if (a.top_k) j["top_k"] = a.top_k.value(); + if (a.top_p) j["top_p"] = a.top_p.value(); +} + +void from_json(const nlohmann::ordered_json& j, GeneratorParamsArgs& a) { + if (j.contains("batch_size")) j.at("batch_size").get_to(a.batch_size); + if (j.contains("chunk_size")) j.at("chunk_size").get_to(a.chunk_size); + if (j.contains("do_sample")) j.at("do_sample").get_to(a.do_sample); + if (j.contains("min_length")) j.at("min_length").get_to(a.min_length); + if (j.contains("max_length")) j.at("max_length").get_to(a.max_length); + if (j.contains("num_beams")) j.at("num_beams").get_to(a.num_beams); + if (j.contains("num_return_sequences")) j.at("num_return_sequences").get_to(a.num_return_sequences); + if (j.contains("repetition_penalty")) j.at("repetition_penalty").get_to(a.repetition_penalty); + if (j.contains("temperature")) j.at("temperature").get_to(a.temperature); + if (j.contains("top_k")) j.at("top_k").get_to(a.top_k); + if (j.contains("top_p")) j.at("top_p").get_to(a.top_p); +} + +bool ParseArgs( + int argc, + char** argv, + GeneratorParamsArgs& generator_params_args, + GuidanceArgs& guidance_args, + std::string& model_path, + std::string& ep, + std::string& ep_path, + std::string& system_prompt, + std::string& user_prompt, + bool& verbose, + bool& debug, + bool& interactive, + bool& rewind, + std::vector& image_paths, + std::vector& audio_paths) { + CLI::App app{"Command-line arguments for ORT GenAI C/C++ examples"}; + argv = app.ensure_utf8(argv); + + std::string generator_params("Generator Params"); + std::string guidance("Guidance Arguments"); + + app.add_option("-b,--batch_size", generator_params_args.batch_size, "Batch size used during inference.")->group(generator_params); + app.add_option("-c,--chunk_size", generator_params_args.chunk_size, "Chunk size for prefill chunking during context processing (default: 0 = disabled, >0 = enabled)")->group(generator_params); + app.add_option("-s,--do_sample", generator_params_args.do_sample, "Do random sampling. When false, greedy or beam search are used to generate the output. Defaults to false")->group(generator_params); + app.add_option("-i,--min_length", generator_params_args.min_length, "Min number of tokens to generate including the prompt")->group(generator_params); + app.add_option("-l,--max_length", generator_params_args.max_length, "Max number of tokens to generate including the prompt")->group(generator_params); + app.add_option("-n,--num_beams", generator_params_args.num_beams, "Number of beams to create")->group(generator_params); + app.add_option("-q,--num_return_sequences", generator_params_args.num_return_sequences, "Number of return sequences to produce")->group(generator_params); + app.add_option("-r,--repetition_penalty", generator_params_args.repetition_penalty, "Repetition penalty to sample with")->group(generator_params); + app.add_option("-t,--temperature", generator_params_args.temperature, "Temperature to sample with")->group(generator_params); + app.add_option("-k,--top_k", generator_params_args.top_k, "Top k tokens to sample from")->group(generator_params); + app.add_option("-p,--top_p", generator_params_args.top_p, "Top p probability to sample with")->group(generator_params); + + app.add_option("--response_format", guidance_args.response_format, "Provide response format for the model")->group(guidance); + app.add_option("--tools_file", guidance_args.tools_file, "Path to file containing list of OpenAI-compatible tool definitions. Ex: test/test_models/tool-definitions/weather.json")->group(guidance); + app.add_flag("--text_output", guidance_args.text_output, "Produce a text response in the output")->group(guidance); + app.add_flag("--tool_output", guidance_args.tool_output, "Produce a tool call in the output")->group(guidance); + app.add_option("--tool_call_start", guidance_args.tool_call_start, "String representation of tool call start (ex: <|tool_call|>). Needs to be marked as special in tokenizer.json for guidance to work.")->group(guidance); + app.add_option("--tool_call_end", guidance_args.tool_call_end, "String representation of tool call end (ex: <|/tool_call|>). Needs to be marked as special in tokenizer.json for guidance to work.")->group(guidance); + + app.add_option("-m,--model_path", model_path, "ONNX model folder path (must contain genai_config.json and model.onnx)")->required(); + app.add_option("-e,--execution_provider", ep, "Execution provider to run the ONNX Runtime session with. Defaults to follow_config that uses the execution provider listed in the genai_config.json instead."); + app.add_flag("-v,--verbose", verbose, "Print verbose output and timing information. Defaults to false"); + app.add_flag("-d,--debug", debug, "Dump input and output tensors with debug mode. Defaults to false"); + + app.add_option("--ep_path", ep_path, "Path to execution provider DLL/SO for plug-in providers (ex: onnxruntime_providers_cuda.dll or onnxruntime_providers_tensorrt.dll)"); + app.add_option("--system_prompt", system_prompt, "System prompt to use for the model."); + app.add_option("--user_prompt", user_prompt, "User prompt to use for the model."); + app.add_flag("--rewind", rewind, "Rewind to the system prompt after each generation. Defaults to false. Only used in model_chat."); + app.add_flag_callback( + "--non_interactive", [&] { interactive = false; }, "Disable interactive mode"); + + app.add_option("--image_paths", image_paths, "Space-separated list of paths to images. Only used in model_mm.")->expected(0, -1); + app.add_option("--audio_paths", audio_paths, "Space-separated list of paths to audios. Only used in model_mm.")->expected(0, -1); + + try { + app.parse(argc, argv); + } catch (...) { + std::cout << app.help() << std::endl; return false; } - model_path = argv[1]; - if (argc > 2) { - ep = argv[2]; + return true; +} + +void SetLogger(bool inputs, bool outputs) { + Oga::SetLogBool("enabled", true); + Oga::SetLogBool("model_input_values", inputs); + Oga::SetLogBool("model_output_values", outputs); +} + +void RegisterEP(const std::string& ep, const std::string& ep_path) { + if (ep_path.empty()) { + return; // No library path specified, skip registration + } + + std::cout << "Registering execution provider: " << ep_path << std::endl; + auto env = Ort::Env(); + if (ep.compare("cuda") == 0) { + env.RegisterExecutionProviderLibrary("CUDAExecutionProvider", std::filesystem::path(ep_path).c_str()); + } else if (ep.compare("NvTensorRtRtx") == 0) { + env.RegisterExecutionProviderLibrary("NvTensorRTRTXExecutionProvider", std::filesystem::path(ep_path).c_str()); } else { - ep = "follow_config"; + std::cout << "Warning: EP registration not supported for " << ep << std::endl; + std::cout << "Only 'cuda' and 'NvTensorRtRtx' support plug-in libraries." << std::endl; + return; + } + + std::cout << "Registered " << ep << " successfully!" << std::endl; +} + +std::unique_ptr GetConfig(const std::string& path, const std::string& ep, const std::unordered_map& ep_options, GeneratorParamsArgs& search_options) { + auto config = OgaConfig::Create(path.c_str()); + if (ep.compare("follow_config") != 0) { + config->ClearProviders(); + if (ep.compare("cpu") != 0) { + std::cout << "Setting model to " << ep << std::endl; + config->AppendProvider(ep.c_str()); + } + + // Set any EP-specific options + for (const auto& [key, val] : ep_options) { + if (key.compare("enable_cuda_graph") == 0 && (ep.compare("cuda") == 0 || ep.compare("NvTensorRtRtx") == 0) && search_options.num_beams > 1) { + config->SetProviderOption(ep.c_str(), "enable_cuda_graph", "0"); + } else { + config->SetProviderOption(ep.c_str(), key.c_str(), val.c_str()); + } + } + } + + // Set any search-specific options that need to be known before constructing a Model object + // Otherwise they can be set with params.SetSearchOptions(search_options) + nlohmann::ordered_json j = search_options; + std::string s = j.dump(); + config->Overlay(s.c_str()); + return config; +} + +void SetSearchOptions(OgaGeneratorParams& generatorParams, GeneratorParamsArgs& args, bool verbose) { + std::vector opts; + if (args.batch_size) { + generatorParams.SetSearchOption("batch_size", args.batch_size); + opts.push_back("batch_size: " + std::to_string(args.batch_size)); + } + if (args.do_sample) { + generatorParams.SetSearchOptionBool("do_sample", args.do_sample.value()); + opts.push_back("do_sample: " + std::to_string(args.do_sample.value())); + } + if (args.min_length) { + generatorParams.SetSearchOption("min_length", args.min_length.value()); + opts.push_back("min_length: " + std::to_string(args.min_length.value())); + } + if (args.num_beams) { + generatorParams.SetSearchOption("num_beams", args.num_beams); + opts.push_back("num_beams: " + std::to_string(args.num_beams)); + } + if (args.num_return_sequences) { + generatorParams.SetSearchOption("num_return_sequences", args.num_return_sequences); + opts.push_back("num_return_sequences: " + std::to_string(args.num_return_sequences)); + } + if (args.repetition_penalty) { + generatorParams.SetSearchOption("repetition_penalty", args.repetition_penalty.value()); + opts.push_back("repetition_penalty: " + std::to_string(args.repetition_penalty.value())); + } + if (args.temperature) { + generatorParams.SetSearchOption("temperature", args.temperature.value()); + opts.push_back("temperature: " + std::to_string(args.temperature.value())); + } + if (args.top_k) { + generatorParams.SetSearchOption("top_k", args.top_k.value()); + opts.push_back("top_k: " + std::to_string(args.top_k.value())); + } + if (args.top_p) { + generatorParams.SetSearchOption("top_p", args.top_p.value()); + opts.push_back("top_p: " + std::to_string(args.top_p.value())); + } + if (verbose) { + std::cout << "GeneratorParams created: {"; + for (int i = 0; i < opts.size(); i++) { + std::cout << opts[i]; + if (i != opts.size() - 1) std::cout << ", "; + } + std::cout << "}" << std::endl; } - if (ep_library_path) { - if (argc > 3) { - *ep_library_path = argv[3]; +} + +std::string ApplyChatTemplate(const std::string& model_path, OgaTokenizer& tokenizer, const std::string& messages, bool add_generation_prompt, const std::string& tools) { + std::string template_str = ""; + std::filesystem::path jinja_path = std::filesystem::path(model_path) / "chat_template.jinja"; + if (std::filesystem::exists(jinja_path)) { + std::ifstream file(jinja_path, std::ios::binary); + if (file) { + std::ostringstream oss; + oss << file.rdbuf(); + template_str = oss.str(); } else { - *ep_library_path = ""; + // If the file exists but can't be opened, fall back to empty template. + template_str.clear(); } } - return true; + + std::string prompt = std::string(tokenizer.ApplyChatTemplate(template_str.c_str(), messages.c_str(), tools.c_str(), add_generation_prompt)); + return prompt; } -void append_provider(OgaConfig& config, const std::string& provider) { - if (provider.compare("follow_config") != 0) { - config.ClearProviders(); - if (provider.compare("cpu") != 0) { - config.AppendProvider(provider.c_str()); - if (provider.compare("cuda") == 0) { - config.SetProviderOption(provider.c_str(), "enable_cuda_graph", "0"); - } +std::string GetUserPrompt(const std::string& prompt, bool interactive) { + std::string text; + + while (true) { + if (interactive) { + // If interactive mode is on + std::cout << "Prompt (Use quit() to exit):" << std::endl; + // Clear any cin error flags because of SIGINT + std::cin.clear(); + std::getline(std::cin, text); + } else { + // Use provided prompt (whether default or user-provided) + text = prompt; + } + + if (text.empty()) { + std::cout << "Empty input. Please enter a valid prompt." << std::endl; + continue; // Skip to the next iteration if input is empty + } else { + break; + } + } + + return text; +} + +std::vector GetUserMediaPaths(const std::vector& media_paths, bool interactive, std::string& media_type) { + // Check media type + std::string media_type_lower = media_type; + std::transform(media_type_lower.begin(), media_type_lower.end(), media_type_lower.begin(), [](unsigned char c) { return std::tolower(c); }); + if (!(media_type == "audio" || media_type == "image")) { + throw std::invalid_argument("Media type must be 'image' or 'audio'"); + } + std::string media_type_capitalized = (char)std::toupper(media_type[0]) + media_type.substr(1); + + std::vector paths; + if (!media_paths.empty()) { + // If user-provided media paths + paths = media_paths; + } else if (interactive) { + // If interactive mode is on + std::string paths_str; + std::cout << media_type_capitalized << " Path (comma separated; leave empty if no " << media_type << "):" << std::endl; + std::getline(std::cin, paths_str); + + std::unique_ptr images; + for (size_t start = 0, end = 0; end < paths_str.size(); start = end + 1) { + end = paths_str.find(',', start); + paths.push_back(Trim(paths_str.substr(start, end - start))); + } + } + + paths.erase(std::remove_if(paths.begin(), paths.end(), [](const std::string& s) { return s.empty(); }), paths.end()); + for (const auto& path : paths) { + if (!std::filesystem::exists(path)) { + std::string error_message = media_type_capitalized + " file not found: " + path; + throw std::runtime_error(error_message); } + std::cout << "Using " << media_type << ": " << path << std::endl; } + + return paths; } -void register_provider_library(const std::string& provider, const std::string& library_path) { - if (library_path.empty()) { - return; // No library path specified, skip registration +std::tuple, int> GetUserImages(const std::vector& image_paths, bool interactive) { + std::string media_type = "image"; + std::vector paths = GetUserMediaPaths(image_paths, interactive, media_type); + if (paths.empty()) { + std::cout << "No " << media_type << " provided" << std::endl; + return std::make_tuple(nullptr, 0); + } + + std::vector paths_c; + for (const auto& path : paths) { + paths_c.push_back(path.c_str()); + } + + std::unique_ptr images = OgaImages::Load(paths_c); + return std::make_tuple(std::move(images), static_cast(paths.size())); +} + +std::tuple, int> GetUserAudios(const std::vector& audio_paths, bool interactive) { + std::string media_type = "audio"; + std::vector paths = GetUserMediaPaths(audio_paths, interactive, media_type); + if (paths.empty()) { + std::cout << "No " << media_type << " provided" << std::endl; + return std::make_tuple(nullptr, 0); + } + + std::vector paths_c; + for (const auto& path : paths) { + paths_c.push_back(path.c_str()); + } + + std::unique_ptr audios = OgaAudios::Load(paths_c); + return std::make_tuple(std::move(audios), static_cast(paths.size())); +} + +nlohmann::ordered_json GetUserContent(const std::string& model_type, int num_images, int num_audios, const std::string& prompt) { + nlohmann::ordered_json content_json; + + // Combine all image tags, audio tags, and text into one user content + std::string image_tags = "", audio_tags = "", content = ""; + if (model_type == "phi3v") { + // Phi-3 vision, Phi-3.5 vision + for (int i = 0; i < num_images; i++) { + image_tags += "<|image_" + std::to_string(i + 1) + "|>\\n"; + } + content = image_tags + prompt; + content_json = nlohmann::ordered_json(content); + + } else if (model_type == "phi4mm") { + // Phi-4 multimodal + for (int i = 0; i < num_images; i++) { + image_tags += "<|image_" + std::to_string(i + 1) + "|>\\n"; + } + for (int i = 0; i < num_audios; i++) { + audio_tags += "<|audio_" + std::to_string(i + 1) + "|>\\n"; + } + content = image_tags + audio_tags + prompt; + content_json = nlohmann::ordered_json(content); + + } else if (model_type == "qwen2_5_vl" || model_type == "fara") { + // Qwen-2.5 VL, Fara + for (int i = 0; i < num_images; i++) { + image_tags += "<|vision_start|><|image_pad|><|vision_end|>"; + } + content = image_tags + prompt; + content_json = nlohmann::ordered_json(content); + + } else { + // Gemma-3 style: structured content + content_json = nlohmann::ordered_json::array(); + + // Add N image blocks + for (int i = 0; i < num_images; i++) { + content_json.push_back(nlohmann::ordered_json::object({{"type", "image"}})); + } + + // Always add a text block (with the user prompt) + content_json.push_back(nlohmann::ordered_json::object({{"type", "text"}, {"text", prompt}})); + } + + return content_json; +} + +std::vector ToolsToSchemas(std::vector& tools) { + std::vector tool_schemas; + for (Tool tool : tools) { + std::unordered_map name; + name["const"] = tool.function.name; + + nlohmann::ordered_json properties = {}; + properties["name"] = name; + + bool tool_parameters_exist = tool.function.parameters.size() != 0; + if (tool_parameters_exist) { + nlohmann::ordered_json parameters = {}; + parameters["type"] = tool.function.parameters.contains("type") ? tool.function.parameters["type"] : "object"; + nlohmann::ordered_json empty_map = {}; + parameters["properties"] = tool.function.parameters.contains("properties") ? tool.function.parameters["properties"] : empty_map; + std::vector empty_list; + parameters["required"] = tool.function.parameters.contains("required") ? tool.function.parameters["required"].get>() : empty_list; + + properties["parameters"] = parameters; + } + + ToolSchema tool_schema; + tool_schema.description = tool.function.description; + tool_schema.type = "object"; + tool_schema.properties = properties; + tool_schema.required = tool_parameters_exist ? std::vector{"name", "parameters"} : std::vector{"name"}; + tool_schema.additionalProperties = false; + + tool_schemas.push_back(tool_schema); } + return tool_schemas; +} + +std::string GetJsonSchema(std::vector& tools, bool tool_output) { + auto schemas = ToolsToSchemas(tools); + + nlohmann::ordered_json x_guidance = {}; + x_guidance["whitespace_flexible"] = false; + x_guidance["key_separator"] = ": "; + x_guidance["item_separator"] = ", "; + + std::unordered_map> items; + items["anyOf"] = schemas; + + JsonSchema json_schema; + json_schema.xGuidance = x_guidance; + json_schema.type = "array"; + json_schema.items = items; + json_schema.minItems = tool_output ? 1 : 0; + + // Serialize JSON schema to string + nlohmann::ordered_json j = json_schema; + std::string s = j.dump(); + return s; +} + +std::string GetLarkGrammar(std::vector& tools, bool text_output, bool tool_output, const std::string& tool_call_start, const std::string& tool_call_end) { + bool known_tool_call_ids = tool_call_start != "" && tool_call_end != ""; + std::string call_type = known_tool_call_ids ? "toolcall" : "functioncall"; + + std::vector rows; + std::string start_row; + if (text_output && !tool_output) { + start_row = "start: TEXT"; + } else if (!text_output && tool_output) { + start_row = "start: " + call_type; + } else if (text_output && tool_output) { + start_row = "start: TEXT | " + call_type; + } else { + throw new std::runtime_error("At least one of 'text_output' and 'tool_output' must be true"); + } + rows.push_back(start_row); + + if (text_output) { + std::string text_row = "TEXT: /[^{<](.|\\n)*/"; + rows.push_back(text_row); + } + + if (tool_output) { + std::string schema = GetJsonSchema(tools, tool_output); + if (known_tool_call_ids) { + std::string tool_row = "toolcall: " + tool_call_start + " functioncall " + tool_call_end; + rows.push_back(tool_row); + } + + std::string func_row = "functioncall: %json " + schema; + rows.push_back(func_row); + } + + std::string grammar = ""; + for (int i = 0; i < rows.size(); i++) { + grammar += rows[i]; + if (i != rows.size() - 1) grammar += "\n"; + } + return grammar; +} + +std::vector ToTool(std::vector& tool_defs) { + std::vector tools; + for (const auto& tool_def : tool_defs) { + Tool tool = tool_def.get(); + tools.push_back(tool); + } + return tools; +} - std::cout << "Registering execution provider library: " << library_path << std::endl; +std::tuple GetGuidance( + const std::string& response_format, + const std::string& filepath, + const std::string& tools_str, + std::vector* tools, + bool text_output, + bool tool_output, + const std::string& tool_call_start, + const std::string& tool_call_end) { + std::string guidance_type = ""; + std::string guidance_data = ""; + std::vector all_tools; - if (provider.compare("cuda") == 0) { - OgaRegisterExecutionProviderLibrary("CUDAExecutionProvider", library_path.c_str()); - std::cout << "Successfully registered CUDAExecutionProvider from " << library_path << std::endl; - } else if (provider.compare("NvTensorRtRtx") == 0) { - OgaRegisterExecutionProviderLibrary("NvTensorRTRTXExecutionProvider", library_path.c_str()); - std::cout << "Successfully registered NvTensorRTRTXExecutionProvider from " << library_path << std::endl; + // Get list of tools from a range of sources (filepath, JSON-serialized string, in-memory) + if (tool_output) { + if (std::filesystem::exists(filepath)) { + std::string json_str; + std::ifstream file(filepath, std::ios::binary); + if (file) { + std::ostringstream oss; + oss << file.rdbuf(); + json_str = oss.str(); + } + if (json_str.empty()) { + throw new std::runtime_error("Error: JSON file is empty."); + } + + nlohmann::ordered_json j = nlohmann::ordered_json::parse(json_str); + if (j.empty()) { + throw new std::runtime_error("Error: Tools did not de-serialize correctly"); + } + + std::vector defs; + defs.reserve(j.size()); + for (const auto& item : j) { + defs.push_back(item); + } + all_tools = ToTool(defs); + } else if (!tools_str.empty()) { + nlohmann::ordered_json j = nlohmann::ordered_json::parse(tools_str); + if (j.empty()) { + throw new std::runtime_error("Error: Tools did not de-serialize correctly"); + } + + std::vector defs; + defs.reserve(j.size()); + for (const auto& item : j) { + defs.push_back(item); + } + all_tools = ToTool(defs); + } else if (tools && !tools->empty()) { + try { + all_tools = ToTool(*tools); + } catch (...) { + throw new std::runtime_error("Could not convert tools from vector to vector"); + } + } else { + throw new std::runtime_error("Error: Please provide the list of tools through a file, JSON-serialized string, or a list of tools"); + } + + if (all_tools.empty()) { + throw new std::runtime_error("Error: Could not obtain a list of tools in memory"); + } + } + + if (response_format == "text" || response_format == "lark_grammar") { + if (response_format == "text") { + bool right_settings = text_output && !tool_output; + if (!right_settings) { + throw new std::runtime_error("Error: A response format of 'text' requires text_output = true and tool_output = false"); + } + } + + guidance_type = "lark_grammar"; + guidance_data = GetLarkGrammar(all_tools, text_output, tool_output, tool_call_start, tool_call_end); + } else if (response_format == "json_schema" || response_format == "json_object") { + bool right_settings = tool_output && !text_output; + if (!right_settings) { + throw new std::runtime_error("Error: A response format of 'json_schema' or 'json_object' requires text_output = false and tool_output = true"); + } + + guidance_type = "json_schema"; + guidance_data = GetJsonSchema(all_tools, tool_output); } else { - std::cerr << "Warning: Provider library registration not supported for provider '" << provider << "'" << std::endl; - std::cerr << " Only 'cuda' and 'NvTensorRtRtx' support plug-in libraries." << std::endl; + throw new std::runtime_error("Error: Invalid response format provided"); } -} \ No newline at end of file + + nlohmann::ordered_json j = all_tools; + std::string s = j.dump(); + return std::make_tuple(guidance_type, guidance_data, s); +} diff --git a/examples/c/src/common.h b/examples/c/src/common.h index 3322a4de9e..b161acfaac 100644 --- a/examples/c/src/common.h +++ b/examples/c/src/common.h @@ -2,12 +2,20 @@ // Licensed under the MIT License. #pragma once + #include #include -#include -#include #include +#include #include +#include +#include +#include +#include + +#include +#include +#include "onnxruntime_cxx_api.h" #include "ort_genai.h" using Clock = std::chrono::high_resolution_clock; @@ -19,9 +27,7 @@ class Timing { public: Timing(const Timing&) = delete; Timing& operator=(const Timing&) = delete; - Timing() = default; - ~Timing() = default; void RecordStartTimestamp(); @@ -35,20 +41,367 @@ class Timing { TimePoint end_timestamp_; }; -bool FileExists(const char* path); - +/** + * @brief Trim user-provided filepath + * + * @param str Filepath to trim + * + * @return Trimmed filepath + */ std::string Trim(const std::string& str); -// Returns true if model_path, ep, and ep_library_path were able to be set from user cmd-line args. -// Returns false if insufficient cmd-line arguments were passed. -// Note: ep will be set to "follow_config" if user only gives model_path -// Note: ep_library_path will be empty if not provided (or if nullptr is passed) -bool parse_args(int /*argc*/, char** argv, std::string& model_path, std::string& ep, std::string* ep_library_path = nullptr); +/** + * @brief A class for defining a tool in a JSON schema compatible way + */ +struct ToolSchema { + std::string description; + std::string type; + nlohmann::ordered_json properties; + std::vector required; + bool additionalProperties; +}; + +/** + * @brief Convert ToolSchema to JSON + * + * @param j JSON object + * @param tool ToolSchema object + * + * @return None + */ +void to_json(nlohmann::ordered_json& j, const ToolSchema& tool); + +/** + * @brief Convert JSON to ToolSchema + * + * @param j JSON object + * @param tool ToolSchema object + * + * @return None + */ +void from_json(const nlohmann::ordered_json& j, ToolSchema& tool); + +/** + * @brief A class for defining a JSON schema for guidance + */ +struct JsonSchema { + nlohmann::ordered_json xGuidance; + std::string type; + std::unordered_map> items; + int minItems; +}; + +/** + * @brief Convert JsonSchema to JSON + * + * @param j JSON object + * @param schema JsonSchema object + * + * @return None + */ +void to_json(nlohmann::ordered_json& j, const JsonSchema& schema); + +/** + * @brief Convert JSON to JsonSchema + * + * @param j JSON object + * @param schema JsonSchema object + * + * @return None + */ +void from_json(const nlohmann::ordered_json& j, JsonSchema& schema); + +/** + * @brief A class for defining a function in an OpenAI-compatible way + */ +struct FunctionDefinition { + std::string name; + std::string description; + nlohmann::ordered_json parameters; +}; + +/** + * @brief Convert FunctionDefinition to JSON + * + * @param j JSON object + * @param func FunctionDefinition object + * + * @return None + */ +void to_json(nlohmann::ordered_json& j, const FunctionDefinition& func); + +/** + * @brief Convert JSON to FunctionDefinition + * + * @param j JSON object + * @param func FunctionDefinition object + * + * @return None + */ +void from_json(const nlohmann::ordered_json& j, FunctionDefinition& func); + +/** + * @brief A class for defining a tool in an OpenAI-compatible way + */ +struct Tool { + std::string type; + FunctionDefinition function; +}; + +/** + * @brief Convert Tool to JSON + * + * @param j JSON object + * @param t Tool object + * + * @return None + */ +void to_json(nlohmann::ordered_json& j, const Tool& t); + +/** + * @brief Convert JSON to Tool + * + * @param j JSON object + * @param t Tool object + * + * @return None + */ +void from_json(const nlohmann::ordered_json& j, Tool& t); + +/** + * @brief A class for holding parsed values for generator params + */ +struct GeneratorParamsArgs { + int batch_size = 1; + int chunk_size = 0; + std::optional do_sample; + std::optional min_length; + std::optional max_length; + int num_beams = 1; + int num_return_sequences = 1; + std::optional repetition_penalty; + std::optional temperature; + std::optional top_k; + std::optional top_p; +}; + +/** + * @brief Convert GeneratorParamsArgs to JSON + * + * @param j JSON object + * @param a Args object + * + * @return None + */ +void to_json(nlohmann::ordered_json& j, const GeneratorParamsArgs& a); + +/** + * @brief Convert JSON to GeneratorParamsArgs + * + * @param j JSON object + * @param a Args object + * + * @return None + */ +void from_json(const nlohmann::ordered_json& j, GeneratorParamsArgs& a); + +/** + * @brief A class for holding parsed values for guidance + */ +struct GuidanceArgs { + std::string response_format = ""; + std::string tools_file = ""; + bool text_output = false; + bool tool_output = false; + std::string tool_call_start = ""; + std::string tool_call_end = ""; +}; + +/** + * @brief Parse command-line arguments from user + * + * @param argc Number of command-line arguments provided + * @param argv Contents of command-line arguments provided + * @param generator_params_args Struct to hold args for generation params + * @param guidance_args Struct to hold args for guidance + * @param model_path Path to model folder containing GenAI config + * @param ep Name of execution provider to set + * @param ep_path Path to execution provider to register + * @param system_prompt System prompt to use for the model + * @param user_prompt User prompt to use for the model + * @param verbose Use verbose logging + * @param debug Use debug mode to dump input and output tensors + * @param interactive Run in interactive mode + * @param rewind Rewind to the system prompt after each generation + * @param image_paths File paths to images + * @param audio_paths File paths to audios + * + * @return true if command-line arguments can be parsed, else false + */ +bool ParseArgs(int argc, char** argv, GeneratorParamsArgs& generator_params_args, GuidanceArgs& guidance_args, std::string& model_path, std::string& ep, std::string& ep_path, std::string& system_prompt, std::string& user_prompt, bool& verbose, bool& debug, bool& interactive, bool& rewind, std::vector& image_paths, std::vector& audio_paths); + +/** + * @brief Set log options inside ORT GenAI + * + * @param inputs Dump inputs to the model in the console + * @param outputs Dump outputs to the model in the console + * + * @return None + */ +void SetLogger(bool inputs = true, bool outputs = true); + +/** + * @brief Register execution provider if path is provided + * + * @param ep Name of execution provider + * @param ep_path Path to execution provider to register + * + * @return None + */ +void RegisterEP(const std::string& ep, const std::string& ep_path); + +/** + * @brief Get OgaConfig object and set EP-specific and search-specific options inside it + * + * @param path Path to model folder containing GenAI config + * @param ep Name of execution provider to set + * @param ep_options Map of EP-specific option names and their values + * @param search_options Map of search-specific option names and their values + * + * @return ORT GenAI config object with all options set + */ +std::unique_ptr GetConfig(const std::string& path, const std::string& ep, const std::unordered_map& ep_options, GeneratorParamsArgs& search_options); + +/** + * @brief Set search options for a generator's params during decoding + * + * @param generatorParams Generator params object to set on + * @param args Arguments provided by user + * @param verbose Use verbose logging + * + * @return None + */ +void SetSearchOptions(OgaGeneratorParams& generatorParams, GeneratorParamsArgs& args, bool verbose); + +/** + * @brief Apply the chat template with various fallback options + * + * @param model_path Path to folder containing model + * @param tokenizer Tokenizer object to use + * @param messages String-encoded list of messages + * @param add_generation_prompt Add tokens to indicate the start of the AI's response + * @param tools String-encoded list of tools + * + * @return Prompt to encode + */ +std::string ApplyChatTemplate(const std::string& model_path, OgaTokenizer& tokenizer, const std::string& messages, bool add_generation_prompt, const std::string& tools = ""); + +/** + * @brief Get prompt for 'user' role in chat template + * + * @param prompt Provided prompt + * @param interactive Interactive mode (otherwise uses either user-provided prompt or default) + * + * @return Prompt to use + */ +std::string GetUserPrompt(const std::string& prompt, bool interactive); + +/** + * @brief Get paths to media for user + * + * @param media_paths User-provided media paths + * @param interactive Interactive mode (otherwise uses either user-provided media paths or default) + * @param media_type The media type being obtained + * + * @return all media filepaths to read and encode + */ +std::vector GetUserMediaPaths(const std::vector& media_paths, bool interactive, const std::string& media_type); + +/** + * @brief Get images for user + * + * @param image_paths User-provided image paths + * @param interactive Interactive mode (otherwise uses either user-provided image paths or default) + * + * @return (all images, number of images) as a tuple + */ +std::tuple, int> GetUserImages(const std::vector& image_paths, bool interactive); + +/** + * @brief Get audios for user + * + * @param audio_paths User-provided audio paths + * @param interactive Interactive mode (otherwise uses either user-provided audio paths or default) + * + * @return (all audios, number of audios) as a tuple + */ +std::tuple, int> GetUserAudios(const std::vector& audio_paths, bool interactive); + +/** + * @brief Get content for 'user' role in chat template + * + * @param model_type Model type inside ORT GenAI + * @param num_images Number of images + * @param num_audios Number of audios + * @param prompt User prompt + * + * @return JSON-encoded combined content for 'user' role + */ +nlohmann::ordered_json GetUserContent(const std::string& model_type, int num_images, int num_audios, const std::string& prompt); + +/** + * @brief Convert a list of tools to a list of tool schemas + * + * @param tools List of OpenAI-compatible tools + * + * @return List of JSON schema compatible tools + */ +std::vector ToolsToSchemas(std::vector& tools); + +/** + * @brief Create a JSON schema from a list of tools + * + * @param tools List of OpenAI-compatible tools + * @param tool_output Output can have a tool call + * + * @return JSON schema as a JSON-compatible string + */ +std::string GetJsonSchema(std::vector& tools, bool tool_output); + +/** + * @brief Create a LARK grammar from a list of tools + * + * @param tools List of OpenAI-compatible tools + * @param text_output Output can have text + * @param tool_output Output can have a tool call + * @param tool_call_start String representation of tool call starting token + * @param tool_call_end String representation of tool call ending token + * + * @return LARK grammar as a string + */ +std::string GetLarkGrammar(std::vector& tools, bool text_output, bool tool_output, const std::string& tool_call_start, const std::string& tool_call_end); -// Append provider / options to config. -// This is a no-op if provider=="follow_config" -void append_provider(OgaConfig& config, const std::string& provider); +/** + * @brief Convert a JSON-deserialized object of tools to a list of Tool objects + * + * @param tool_defs JSON-deserialized object containing OpenAI-compatible tool definitions + * + * @return List of Tool objects + */ +std::vector ToTool(std::vector& tool_defs); -// Register execution provider library if specified -// This enables plug-in provider support for CUDA and NvTensorRT -void register_provider_library(const std::string& provider, const std::string& library_path); \ No newline at end of file +/** + * @brief Create a grammar to use with LLGuidance + * + * @param response_format Type of format requested + * @param filepath Path to file containing OpenAI-compatible tool definitions + * @param tools_str JSON-serialized string containing OpenAI-compatible tool definitions + * @param tools List of OpenAI-compatible tools defined in memory + * @param text_output Output can have text + * @param tool_output Output can have a tool call + * @param tool_call_start String representation of tool call starting token + * @param tool_call_end String representation of tool call ending token + * + * @return (grammar type, grammar data, tools) as a tuple of strings + */ +std::tuple GetGuidance(const std::string& response_format = "", const std::string& filepath = "", const std::string& tools_str = "", std::vector* tools = nullptr, bool text_output = true, bool tool_output = false, const std::string& tool_call_start = "", const std::string& tool_call_end = ""); diff --git a/examples/c/src/model_chat.cpp b/examples/c/src/model_chat.cpp index b4725d67e0..37a537fb40 100644 --- a/examples/c/src/model_chat.cpp +++ b/examples/c/src/model_chat.cpp @@ -1,18 +1,19 @@ +// ----------------------------------------------------------------------------------------------- // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. - -#include -#include -#include - -#include "ort_genai.h" -#include "common.h" - +// // C++ API Example for Model Chat // This example demonstrates how to use the C++ API of the ONNX Runtime GenAI library // to perform model chat tasks (i.e. continuous decoding). It includes functionalities // to create a model, tokenizer, and generator, and to handle user input for generating // responses based on prompts. +// ----------------------------------------------------------------------------------------------- + +#include +#include +#include + +#include "common.h" OgaGenerator* g_generator = nullptr; @@ -24,76 +25,139 @@ void TerminateGeneration(int signum) { g_generator->SetRuntimeOption("terminate_session", "1"); } -void CXX_API(const char* model_path, const char* execution_provider, const char* ep_library_path) { - // Register execution provider library if specified (for plug-in providers) - std::string provider(execution_provider); - std::string library_path(ep_library_path); - register_provider_library(provider, library_path); - - std::cout << "Creating config..." << std::endl; - auto config = OgaConfig::Create(model_path); - - append_provider(*config, provider); - - std::cout << "Creating model..." << std::endl; +void CXX_API( + GeneratorParamsArgs& generator_params_args, + GuidanceArgs& guidance_args, + const std::string& model_path, + const std::string& ep, + const std::string& ep_path, + const std::string& system_prompt, + const std::string& user_prompt, + bool verbose, + bool debug, + bool interactive, + bool rewind) { + if (debug) SetLogger(); + RegisterEP(ep, ep_path); + + if (verbose) std::cout << "Creating config..." << std::endl; + std::unordered_map ep_options; + auto config = GetConfig(model_path, ep, ep_options, generator_params_args); + + if (verbose) std::cout << "Creating model..." << std::endl; auto model = OgaModel::Create(*config); - std::cout << "Creating tokenizer..." << std::endl; + if (verbose) std::cout << "Creating tokenizer..." << std::endl; auto tokenizer = OgaTokenizer::Create(*model); - auto tokenizer_stream = OgaTokenizerStream::Create(*tokenizer); + auto stream = OgaTokenizerStream::Create(*tokenizer); + // Set search options for generator params auto params = OgaGeneratorParams::Create(*model); - params->SetSearchOption("max_length", 1024); + SetSearchOptions(*params, generator_params_args, verbose); + + // Create system message + nlohmann::ordered_json message = nlohmann::ordered_json::array(); + message.push_back({{"role", "system"}, {"content", system_prompt}}); + + // Get and set guidance info if requested + std::string guidance_type, guidance_data, tools; + if (!guidance_args.response_format.empty()) { + std::cout << "Make sure your tool call start id and tool call end id are marked as special in tokenizer.json" << std::endl; + std::tie(guidance_type, guidance_data, tools) = GetGuidance( + guidance_args.response_format, + guidance_args.tools_file, + "", // tools_str + nullptr, // tools + guidance_args.text_output, + guidance_args.tool_output, + guidance_args.tool_call_start, + guidance_args.tool_call_end); + message[0]["tools"] = tools; + + params->SetGuidance(guidance_type.c_str(), guidance_data.c_str()); + if (verbose) { + std::cout << std::endl; + std::cout << "Guidance type is: " << guidance_type << std::endl; + std::cout << "Guidance data is: \n" + << guidance_data << std::endl; + std::cout << std::endl; + } + } + // Create generator auto generator = OgaGenerator::Create(*model, *params); g_generator = generator.get(); // Store the current generator for termination + if (verbose) std::cout << "Generator created" << std::endl; - // Define System Prompt - std::string system_prompt = "You are a helpful AI assistant."; + // Apply chat template + std::string prompt; + try { + bool add_generation_prompt = false; + prompt = ApplyChatTemplate(model_path, *tokenizer, message.dump(), add_generation_prompt, tools); + } catch (...) { + prompt = system_prompt; + } + if (verbose) std::cout << "System prompt: " << prompt << "\n" + << std::endl; + + // Encode system prompt and append tokens to model + auto sequences = OgaSequences::Create(); + tokenizer->Encode(prompt.c_str(), *sequences); + const int prompt_tokens_length = sequences->SequenceCount(0); + generator->AppendTokenSequences(*sequences); + // Keep asking for input prompts in a loop while (true) { - signal(SIGINT, TerminateGeneration); + // Get user prompt std::string text; - std::cout << "Prompt: (Use quit() to exit) Or (To terminate current output generation, press Ctrl+C)" << std::endl; - // Clear Any cin error flags because of SIGINT - std::cin.clear(); - std::getline(std::cin, text); - - if (text.empty()) { - std::cout << "Empty input. Please enter a valid prompt." << std::endl; - continue; // Skip to the next iteration if input is empty - } else if (text == "quit()") { - break; // Exit the loop + + if (interactive) { + std::cout << "Prompt (Use quit() to exit):" << std::endl; + // Clear any cin error flags because of SIGINT + std::cin.clear(); + std::getline(std::cin, text); + + if (text.empty()) { + std::cout << "Empty input. Please enter a valid prompt." << std::endl; + continue; // Skip to the next iteration if input is empty + } else if (text == "quit()") { + break; // Exit the loop + } + } else { + text = user_prompt; } - const std::string messages = R"( - [ - { - "role": "system", - "content": ")" + system_prompt + - R"(" - }, - { - "role": "user", - "content": ")" + text + R"(" - } - ] - )"; - system_prompt.clear(); // Clear the system prompt to avoid reusing it in the next iteration - std::string prompt = std::string(tokenizer->ApplyChatTemplate("", messages.c_str(), "", true)); + signal(SIGINT, TerminateGeneration); + // Start timings bool is_first_token = true; Timing timing; timing.RecordStartTimestamp(); - auto sequences = OgaSequences::Create(); - tokenizer->Encode(prompt.c_str(), *sequences); + // Create user message + message = nlohmann::ordered_json::array(); + message.push_back({{"role", "user"}, {"content", text}}); - std::cout << "Generating response..." << std::endl; - generator->SetRuntimeOption("terminate_session", "0"); + // Apply chat template + try { + bool add_generation_prompt = true; + prompt = ApplyChatTemplate(model_path, *tokenizer, message.dump(), add_generation_prompt); + } catch (...) { + prompt = text; + } + if (verbose) std::cout << "User prompt: " << prompt << "\n" + << std::endl; + + // Encode user prompt and append tokens to model + sequences = OgaSequences::Create(); + tokenizer->Encode(prompt.c_str(), *sequences); generator->AppendTokenSequences(*sequences); - const auto current_token_count = generator->GetSequenceCount(0); + // Run generation loop + if (verbose) std::cout << "Running generation loop..." << std::endl; + std::cout << std::endl; + std::cout << "Output: "; + const auto current_token_count = generator->GetSequenceCount(0); try { while (!generator->IsDone()) { generator->GenerateNextToken(); @@ -104,40 +168,62 @@ void CXX_API(const char* model_path, const char* execution_provider, const char* } const auto new_token = generator->GetNextTokens()[0]; - std::cout << tokenizer_stream->Decode(new_token) << std::flush; + std::cout << stream->Decode(new_token) << std::flush; } } catch (const std::exception& e) { - std::cout << "\n\033[31mTerminating generation: " << e.what() << "\033[0m" << std::endl; + std::cout << "\n" + << "Terminating generation: " << e.what() << std::endl; generator->RewindTo(current_token_count); // Rewind to the last valid state } - timing.RecordEndTimestamp(); - const int prompt_tokens_length = sequences->SequenceCount(0); + const int new_tokens_length = generator->GetSequenceCount(0) - prompt_tokens_length; timing.Log(prompt_tokens_length, new_tokens_length); - for (int i = 0; i < 3; ++i) - std::cout << std::endl; + std::cout << "\n\n" + << std::endl; + if (!interactive) break; + + // Rewind the generator to the system prompt. This will erase all the chat history with the model. + if (rewind) generator->RewindTo(prompt_tokens_length); } } int main(int argc, char** argv) { - std::string model_path, ep, ep_library_path; - if (!parse_args(argc, argv, model_path, ep, &ep_library_path)) { + // Get command-line args + GeneratorParamsArgs generator_params_args; + GuidanceArgs guidance_args; + std::string model_path, ep = "follow_config", ep_path = "", system_prompt = "You are a helpful AI assistant.", user_prompt = "What color is the sky?"; + bool verbose = false, debug = false, interactive = true, rewind = false; + std::vector image_paths; + std::vector audio_paths; + if (!ParseArgs(argc, argv, generator_params_args, guidance_args, model_path, ep, ep_path, system_prompt, user_prompt, verbose, debug, interactive, rewind, image_paths, audio_paths)) { return -1; } // Responsible for cleaning up the library during shutdown OgaHandle handle; - std::cout << "---------------------------" << std::endl; + std::cout << "----------------------------" << std::endl; std::cout << "Hello, ORT GenAI Model Chat!" << std::endl; - std::cout << "---------------------------" << std::endl; + std::cout << "----------------------------" << std::endl; + + std::cout << "Model path: " << model_path << std::endl; + std::cout << "Execution provider: " << ep << std::endl; + if (!ep_path.empty()) std::cout << "Execution provider path: " << ep_path << std::endl; + std::cout << "System prompt: " << system_prompt << std::endl; + if (!interactive) std::cout << "User prompt: " << user_prompt << std::endl; + std::cout << "Verbose: " << verbose << std::endl; + std::cout << "Debug: " << debug << std::endl; + std::cout << "Interactive: " << interactive << std::endl; + std::cout << "Rewind: " << rewind << std::endl; + std::cout << "--------------------------" << std::endl; + std::cout << std::endl; try { - CXX_API(model_path.c_str(), ep.c_str(), ep_library_path.c_str()); + CXX_API(generator_params_args, guidance_args, model_path, ep, ep_path, system_prompt, user_prompt, verbose, debug, interactive, rewind); } catch (const std::exception& e) { - std::cerr << "\033[31mError: " << e.what() << "\033[0m" << std::endl; + std::cerr << "Error: " << e.what() << std::endl; return -1; } diff --git a/examples/c/src/model_mm.cpp b/examples/c/src/model_mm.cpp new file mode 100644 index 0000000000..e01393c41a --- /dev/null +++ b/examples/c/src/model_mm.cpp @@ -0,0 +1,222 @@ +// ----------------------------------------------------------------------------------------------- +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// C++ API Example for Model Question-Answering +// This example demonstrates how to use the C++ API of the ONNX Runtime GenAI library +// to perform model question-answering tasks. It includes functionalities to create a model, +// tokenizer, and generator, and to handle user input for generating responses based on prompts. +// ----------------------------------------------------------------------------------------------- + +#include +#include +#include + +#include "common.h" + +OgaGenerator* g_generator = nullptr; + +void TerminateGeneration(int signum) { + if (g_generator == nullptr) { + return; + } + g_generator->SetRuntimeOption("terminate_session", "1"); +} + +void CXX_API( + GeneratorParamsArgs& generator_params_args, + GuidanceArgs& guidance_args, + const std::string& model_path, + const std::string& ep, + const std::string& ep_path, + const std::vector& image_paths, + const std::vector& audio_paths, + const std::string& system_prompt, + const std::string& user_prompt, + bool verbose, + bool debug, + bool interactive) { + if (debug) SetLogger(); + RegisterEP(ep, ep_path); + + if (verbose) std::cout << "Creating config..." << std::endl; + std::unordered_map ep_options; + auto config = GetConfig(model_path, ep, ep_options, generator_params_args); + + if (verbose) std::cout << "Creating model..." << std::endl; + auto model = OgaModel::Create(*config); + + if (verbose) std::cout << "Creating tokenizer..." << std::endl; + auto tokenizer = OgaTokenizer::Create(*model); + auto stream = OgaTokenizerStream::Create(*tokenizer); + + if (verbose) std::cout << "Creating processor..." << std::endl; + auto processor = OgaMultiModalProcessor::Create(*model); + + // Create running list of messages + std::vector input_list; + nlohmann::ordered_json system_message = nlohmann::ordered_json{{"role", "system"}, {"content", system_prompt}}; + input_list.push_back(system_message); + + // Get and set guidance info if requested + std::string guidance_type, guidance_data, tools; + if (!guidance_args.response_format.empty()) { + std::cout << "Make sure your tool call start id and tool call end id are marked as special in tokenizer.json" << std::endl; + std::tie(guidance_type, guidance_data, tools) = GetGuidance( + guidance_args.response_format, + guidance_args.tools_file, + "", // tools_str + nullptr, // tools + guidance_args.text_output, + guidance_args.tool_output, + guidance_args.tool_call_start, + guidance_args.tool_call_end); + + input_list[0]["tools"] = tools; + } + + // Keep asking for input prompts in a loop + while (true) { + // Get images + std::unique_ptr images; + int num_images; + std::tie(images, num_images) = GetUserImages(image_paths, interactive); + + // Get audios + std::unique_ptr audios; + int num_audios; + std::tie(audios, num_audios) = GetUserAudios(audio_paths, interactive); + + // Get user prompt + std::string text = GetUserPrompt(user_prompt, interactive); + signal(SIGINT, TerminateGeneration); + if (text == "quit()") { + break; // Exit the loop + } + + // Construct user content based on inputs + auto type = model->GetType(); + nlohmann::ordered_json user_content = GetUserContent(std::string(type), num_images, num_audios, text); + + // Add user message to list of messages + nlohmann::ordered_json user_message = nlohmann::ordered_json{{"role", "user"}, {"content", user_content}}; + input_list.push_back(user_message); + nlohmann::ordered_json j = input_list; + std::string messages = j.dump(); + + // Start timings + bool is_first_token = true; + Timing timing; + timing.RecordStartTimestamp(); + + // Initialize generator params + auto params = OgaGeneratorParams::Create(*model); + SetSearchOptions(*params, generator_params_args, verbose); + + // Initialize guidance info + if (!guidance_args.response_format.empty()) { + params->SetGuidance(guidance_type.c_str(), guidance_data.c_str()); + if (verbose) { + std::cout << std::endl; + std::cout << "Guidance type is: " << guidance_type << std::endl; + std::cout << "Guidance data is: \n" + << guidance_data << std::endl; + std::cout << std::endl; + } + } + + // Create generator + auto generator = OgaGenerator::Create(*model, *params); + g_generator = generator.get(); // Store the current generator for termination + if (verbose) std::cout << "Generator created" << std::endl; + + // Apply chat template + std::string prompt; + try { + bool add_generation_prompt = true; + prompt = ApplyChatTemplate(model_path, *tokenizer, messages, add_generation_prompt, tools); + } catch (...) { + prompt = text; + } + if (verbose) std::cout << "Prompt: " << prompt << "\n" + << std::endl; + + // Encode combined system + user prompt and append inputs to model + auto input_tensors = processor->ProcessImagesAndAudios(prompt.c_str(), images.get(), audios.get()); + generator->SetInputs(*input_tensors); + const int prompt_tokens_length = generator->GetSequenceCount(0); + + // Run generation loop + if (verbose) std::cout << "Running generation loop..." << std::endl; + std::cout << std::endl; + std::cout << "Output: "; + try { + while (!generator->IsDone()) { + generator->GenerateNextToken(); + + if (is_first_token) { + timing.RecordFirstTokenTimestamp(); + is_first_token = false; + } + + const auto new_token = generator->GetNextTokens()[0]; + std::cout << stream->Decode(new_token) << std::flush; + } + } catch (const std::exception& e) { + std::cout << "\n" + << "Terminating generation: " << e.what() << std::endl; + } + timing.RecordEndTimestamp(); + + // Clear the generator after use + g_generator = nullptr; + + // Remove user message from list of messages + input_list.pop_back(); + + const int new_tokens_length = generator->GetSequenceCount(0) - prompt_tokens_length; + timing.Log(prompt_tokens_length, new_tokens_length); + + std::cout << "\n\n\n"; + if (!interactive) break; + } +} + +int main(int argc, char** argv) { + // Get command-line args + GeneratorParamsArgs generator_params_args; + GuidanceArgs guidance_args; + std::string model_path, ep = "follow_config", ep_path = "", system_prompt = "You are a helpful AI assistant.", user_prompt = "What color is the sky?"; + bool verbose = false, debug = false, interactive = true, rewind = true; + std::vector image_paths; + std::vector audio_paths; + if (!ParseArgs(argc, argv, generator_params_args, guidance_args, model_path, ep, ep_path, system_prompt, user_prompt, verbose, debug, interactive, rewind, image_paths, audio_paths)) { + return -1; + } + + // Responsible for cleaning up the library during shutdown + OgaHandle handle; + + std::cout << "--------------------------" << std::endl; + std::cout << "Hello, ORT GenAI Model-MM!" << std::endl; + std::cout << "--------------------------" << std::endl; + + std::cout << "Model path: " << model_path << std::endl; + std::cout << "Execution provider: " << ep << std::endl; + if (!ep_path.empty()) std::cout << "Execution provider path: " << ep_path << std::endl; + std::cout << "System prompt: " << system_prompt << std::endl; + if (!interactive) std::cout << "User prompt: " << user_prompt << std::endl; + std::cout << "Verbose: " << verbose << std::endl; + std::cout << "Interactive: " << interactive << std::endl; + std::cout << "--------------------------" << std::endl; + std::cout << std::endl; + + try { + CXX_API(generator_params_args, guidance_args, model_path, ep, ep_path, image_paths, audio_paths, system_prompt, user_prompt, verbose, debug, interactive); + } catch (const std::exception& e) { + std::cerr << "Error: " << e.what() << std::endl; + return -1; + } + + return 0; +} \ No newline at end of file diff --git a/examples/c/src/model_qa.cpp b/examples/c/src/model_qa.cpp index 22a459f02e..25b39dfce7 100644 --- a/examples/c/src/model_qa.cpp +++ b/examples/c/src/model_qa.cpp @@ -1,18 +1,19 @@ +// ----------------------------------------------------------------------------------------------- // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +// +// C++ API Example for Model Question-Answering +// This example demonstrates how to use the C++ API of the ONNX Runtime GenAI library +// to perform model question-answering tasks. It includes functionalities to create a model, +// tokenizer, and generator, and to handle user input for generating responses based on prompts. +// ----------------------------------------------------------------------------------------------- +#include #include #include -#include -#include "ort_genai.h" #include "common.h" -// C++ API Example for Model Question-Answering -// This example demonstrates how to use the C++ API of the ONNX Runtime GenAI library -// to perform model question-answering tasks. It includes functionalities to create a model, -// tokenizer, and generator, and to handle user input for generating responses based on prompts. - OgaGenerator* g_generator = nullptr; void TerminateGeneration(int signum) { @@ -22,69 +23,114 @@ void TerminateGeneration(int signum) { g_generator->SetRuntimeOption("terminate_session", "1"); } -void CXX_API(const char* model_path, const char* execution_provider, const char* ep_library_path) { - // Register execution provider library if specified (for plug-in providers) - std::string provider(execution_provider); - std::string library_path(ep_library_path); - register_provider_library(provider, library_path); - - std::cout << "Creating config..." << std::endl; - auto config = OgaConfig::Create(model_path); - - append_provider(*config, provider); - - std::cout << "Creating model..." << std::endl; +void CXX_API( + GeneratorParamsArgs& generator_params_args, + GuidanceArgs& guidance_args, + const std::string& model_path, + const std::string& ep, + const std::string& ep_path, + const std::string& system_prompt, + const std::string& user_prompt, + bool verbose, + bool debug, + bool interactive) { + if (debug) SetLogger(); + RegisterEP(ep, ep_path); + + if (verbose) std::cout << "Creating config..." << std::endl; + std::unordered_map ep_options; + auto config = GetConfig(model_path, ep, ep_options, generator_params_args); + + if (verbose) std::cout << "Creating model..." << std::endl; auto model = OgaModel::Create(*config); - std::cout << "Creating tokenizer..." << std::endl; + if (verbose) std::cout << "Creating tokenizer..." << std::endl; auto tokenizer = OgaTokenizer::Create(*model); - auto tokenizer_stream = OgaTokenizerStream::Create(*tokenizer); + auto stream = OgaTokenizerStream::Create(*tokenizer); + + // Create running list of messages + std::vector input_list; + nlohmann::ordered_json system_message = nlohmann::ordered_json{{"role", "system"}, {"content", system_prompt}}; + input_list.push_back(system_message); + + // Get and set guidance info if requested + std::string guidance_type, guidance_data, tools; + if (!guidance_args.response_format.empty()) { + std::cout << "Make sure your tool call start id and tool call end id are marked as special in tokenizer.json" << std::endl; + std::tie(guidance_type, guidance_data, tools) = GetGuidance( + guidance_args.response_format, + guidance_args.tools_file, + "", // tools_str + nullptr, // tools + guidance_args.text_output, + guidance_args.tool_output, + guidance_args.tool_call_start, + guidance_args.tool_call_end); + + input_list[0]["tools"] = tools; + } + // Keep asking for input prompts in a loop while (true) { - std::string text; - std::cout << "Prompt: (Use quit() to exit) Or (To terminate current output generation, press Ctrl+C)" << std::endl; - // Clear Any cin error flags because of SIGINT - std::cin.clear(); - std::getline(std::cin, text); - - if (text.empty()) { - std::cout << "Empty input. Please enter a valid prompt." << std::endl; - continue; // Skip to the next iteration if input is empty - } else if (text == "quit()") { + // Get user prompt + std::string text = GetUserPrompt(user_prompt, interactive); + signal(SIGINT, TerminateGeneration); + if (text == "quit()") { break; // Exit the loop } - signal(SIGINT, TerminateGeneration); - - const std::string messages = R"( - [ - { - "role": "system", - "content": "You are a helpful AI assistant." - }, - { - "role": "user", - "content": ")" + text + R"(" - } - ] - )"; - const std::string prompt = std::string(tokenizer->ApplyChatTemplate("", messages.c_str(), "", true)); + // Add user message to list of messages + nlohmann::ordered_json user_message = nlohmann::ordered_json{{"role", "user"}, {"content", text}}; + input_list.push_back(user_message); + nlohmann::ordered_json j = input_list; + std::string messages = j.dump(); + // Start timings bool is_first_token = true; Timing timing; timing.RecordStartTimestamp(); - auto sequences = OgaSequences::Create(); - tokenizer->Encode(prompt.c_str(), *sequences); - - std::cout << "Generating response..." << std::endl; - + // Initialize generator params auto params = OgaGeneratorParams::Create(*model); - params->SetSearchOption("max_length", 1024); + SetSearchOptions(*params, generator_params_args, verbose); + + // Initialize guidance info + if (!guidance_args.response_format.empty()) { + params->SetGuidance(guidance_type.c_str(), guidance_data.c_str()); + if (verbose) { + std::cout << std::endl; + std::cout << "Guidance type is: " << guidance_type << std::endl; + std::cout << "Guidance data is: \n" + << guidance_data << std::endl; + std::cout << std::endl; + } + } + + // Create generator auto generator = OgaGenerator::Create(*model, *params); g_generator = generator.get(); // Store the current generator for termination + if (verbose) std::cout << "Generator created" << std::endl; + + // Apply chat template + std::string prompt; + try { + bool add_generation_prompt = true; + prompt = ApplyChatTemplate(model_path, *tokenizer, messages, add_generation_prompt, tools); + } catch (...) { + prompt = text; + } + if (verbose) std::cout << "Prompt: " << prompt << "\n" + << std::endl; + + // Encode combined system + user prompt and append tokens to model + auto sequences = OgaSequences::Create(); + tokenizer->Encode(prompt.c_str(), *sequences); generator->AppendTokenSequences(*sequences); + // Run generation loop + if (verbose) std::cout << "Running generation loop..." << std::endl; + std::cout << std::endl; + std::cout << "Output: "; try { while (!generator->IsDone()) { generator->GenerateNextToken(); @@ -95,40 +141,60 @@ void CXX_API(const char* model_path, const char* execution_provider, const char* } const auto new_token = generator->GetNextTokens()[0]; - std::cout << tokenizer_stream->Decode(new_token) << std::flush; + std::cout << stream->Decode(new_token) << std::flush; } } catch (const std::exception& e) { - std::cout << "\n\033[31mTerminating generation: " << e.what() << "\033[0m" << std::endl; + std::cout << "\n" + << "Terminating generation: " << e.what() << std::endl; } - timing.RecordEndTimestamp(); + + // Clear the generator after use + g_generator = nullptr; + + // Remove user message from list of messages + input_list.pop_back(); + const int prompt_tokens_length = sequences->SequenceCount(0); const int new_tokens_length = generator->GetSequenceCount(0) - prompt_tokens_length; timing.Log(prompt_tokens_length, new_tokens_length); - for (int i = 0; i < 3; ++i) - std::cout << std::endl; - - g_generator = nullptr; // Clear the generator after use + std::cout << "\n\n\n"; + if (!interactive) break; } } int main(int argc, char** argv) { - std::string model_path, ep, ep_library_path; - if (!parse_args(argc, argv, model_path, ep, &ep_library_path)) { + // Get command-line args + GeneratorParamsArgs generator_params_args; + GuidanceArgs guidance_args; + std::string model_path, ep = "follow_config", ep_path = "", system_prompt = "You are a helpful AI assistant.", user_prompt = "What color is the sky?"; + bool verbose = false, debug = false, interactive = true, rewind = true; + std::vector image_paths; + std::vector audio_paths; + if (!ParseArgs(argc, argv, generator_params_args, guidance_args, model_path, ep, ep_path, system_prompt, user_prompt, verbose, debug, interactive, rewind, image_paths, audio_paths)) { return -1; } // Responsible for cleaning up the library during shutdown OgaHandle handle; - std::cout << "-------------------------" << std::endl; + std::cout << "--------------------------" << std::endl; std::cout << "Hello, ORT GenAI Model-QA!" << std::endl; - std::cout << "-------------------------" << std::endl; + std::cout << "--------------------------" << std::endl; + + std::cout << "Model path: " << model_path << std::endl; + std::cout << "Execution provider: " << ep << std::endl; + if (!ep_path.empty()) std::cout << "Execution provider path: " << ep_path << std::endl; + std::cout << "System prompt: " << system_prompt << std::endl; + if (!interactive) std::cout << "User prompt: " << user_prompt << std::endl; + std::cout << "Verbose: " << verbose << std::endl; + std::cout << "Interactive: " << interactive << std::endl; + std::cout << "--------------------------" << std::endl; + std::cout << std::endl; - std::cout << "C++ API" << std::endl; try { - CXX_API(model_path.c_str(), ep.c_str(), ep_library_path.c_str()); + CXX_API(generator_params_args, guidance_args, model_path, ep, ep_path, system_prompt, user_prompt, verbose, debug, interactive); } catch (const std::exception& e) { std::cerr << "Error: " << e.what() << std::endl; return -1; diff --git a/examples/c/src/model_vision.cpp b/examples/c/src/model_vision.cpp deleted file mode 100644 index a4afe40bc0..0000000000 --- a/examples/c/src/model_vision.cpp +++ /dev/null @@ -1,122 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -#include -#include -#include "common.h" -#include "ort_genai.h" - -// C++ API Example - -void CXX_API(const char* model_path, const char* execution_provider, const char* ep_library_path) { - // Register execution provider library if specified (for plug-in providers) - std::string provider(execution_provider); - std::string library_path(ep_library_path); - register_provider_library(provider, library_path); - - std::cout << "Creating config..." << std::endl; - auto config = OgaConfig::Create(model_path); - - append_provider(*config, provider); - - std::cout << "Creating model..." << std::endl; - auto model = OgaModel::Create(*config); - - std::cout << "Creating multimodal processor..." << std::endl; - auto processor = OgaMultiModalProcessor::Create(*model); - - auto tokenizer = OgaTokenizer::Create(*model); - - auto stream = OgaTokenizerStream::Create(*processor); - - while (true) { - std::string image_paths_str; - std::cout << "Image Path (comma separated; leave empty if no image):" << std::endl; - std::getline(std::cin, image_paths_str); - std::unique_ptr images; - std::vector image_paths; - for (size_t start = 0, end = 0; end < image_paths_str.size(); start = end + 1) { - end = image_paths_str.find(',', start); - image_paths.push_back(Trim(image_paths_str.substr(start, end - start))); - } - if (image_paths.empty()) { - std::cout << "No image provided" << std::endl; - } else { - std::cout << "Loading images..." << std::endl; - for (const auto& image_path : image_paths) { - if (!FileExists(image_path.c_str())) { - throw std::runtime_error(std::string("Image file not found: ") + image_path); - } - } - std::vector image_paths_c; - for (const auto& image_path : image_paths) image_paths_c.push_back(image_path.c_str()); - images = OgaImages::Load(image_paths_c); - } - - std::string text; - std::cout << "Prompt: " << std::endl; - std::getline(std::cin, text); - - // Construct messages string with special tokens for ApplyChatTemplate. - - // Note: The Phi-3 Vision chat template expects content to be string, whereas in - // Gemma-3-like models, content type is supported, so we handle these differently. - - std::string messages; - if (std::string(model->GetType()) == "phi3v") { - // Phi-3 Vision-style multimodal usage with image tags - std::string content; - for (size_t i = 0; i < image_paths.size(); ++i) - content += "<|image_" + std::to_string(i + 1) + "|>\\n"; - content += text; - messages = R"([{"role": "user", "content": ")" + content + R"("}])"; - } else { - // Gemma-style multimodal usage with content type - const std::string image_content = R"({ "type": "image" })"; - std::string content = "["; - for (size_t i = 0; i < image_paths.size(); ++i) { - content += image_content + ", "; - } - const std::string text_content = R"({ "type": "text", "text": ")"; - content += text_content + text + R"(" }])"; - messages = R"([{"role": "user", "content": )" + content + R"(}])"; - } - - std::string prompt = std::string(tokenizer->ApplyChatTemplate("", messages.c_str(), "", true)); - - std::cout << "Processing images and prompt..." << std::endl; - auto input_tensors = processor->ProcessImages(prompt.c_str(), images.get()); - - std::cout << "Generating response..." << std::endl; - auto params = OgaGeneratorParams::Create(*model); - params->SetSearchOption("max_length", 7680); - - auto generator = OgaGenerator::Create(*model, *params); - generator->SetInputs(*input_tensors); - - while (!generator->IsDone()) { - generator->GenerateNextToken(); - const auto new_token = generator->GetNextTokens()[0]; - std::cout << stream->Decode(new_token) << std::flush; - } - - for (int i = 0; i < 3; ++i) - std::cout << std::endl; - } -} - -int main(int argc, char** argv) { - std::string model_path, ep, ep_library_path; - if (!parse_args(argc, argv, model_path, ep, &ep_library_path)) { - return -1; - } - - std::cout << "-----------------------------" << std::endl; - std::cout << "Hello, ORT GenAI Model-Vision" << std::endl; - std::cout << "-----------------------------" << std::endl; - - std::cout << "C++ API" << std::endl; - CXX_API(model_path.c_str(), ep.c_str(), ep_library_path.c_str()); - - return 0; -} \ No newline at end of file diff --git a/examples/c/src/phi4-mm.cpp b/examples/c/src/phi4-mm.cpp deleted file mode 100644 index 8c172fe97e..0000000000 --- a/examples/c/src/phi4-mm.cpp +++ /dev/null @@ -1,131 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -#include -#include -#include -#include -#include "common.h" -#include "ort_genai.h" - -// C++ API Example - -void CXX_API(const char* model_path, const char* execution_provider, const char* ep_library_path) { - // Register execution provider library if specified (for plug-in providers) - std::string provider(execution_provider); - std::string library_path(ep_library_path); - register_provider_library(provider, library_path); - - std::cout << "Creating config..." << std::endl; - auto config = OgaConfig::Create(model_path); - - append_provider(*config, provider); - - std::cout << "Creating model..." << std::endl; - auto model = OgaModel::Create(*config); - - std::cout << "Creating multimodal processor..." << std::endl; - auto processor = OgaMultiModalProcessor::Create(*model); - - auto stream = OgaTokenizerStream::Create(*processor); - auto tokenizer = OgaTokenizer::Create(*model); - - while (true) { - // Get images - std::string image_paths_str; - std::cout << "Image Path (comma separated; leave empty if no image):" << std::endl; - std::getline(std::cin, image_paths_str); - std::unique_ptr images; - std::vector image_paths; - for (size_t start = 0, end = 0; end < image_paths_str.size(); start = end + 1) { - end = image_paths_str.find(',', start); - image_paths.push_back(Trim(image_paths_str.substr(start, end - start))); - } - if (image_paths.empty()) { - std::cout << "No image provided" << std::endl; - } else { - std::cout << "Loading images..." << std::endl; - for (const auto& image_path : image_paths) { - if (!FileExists(image_path.c_str())) { - throw std::runtime_error(std::string("Image file not found: ") + image_path); - } - } - std::vector image_paths_c; - for (const auto& image_path : image_paths) image_paths_c.push_back(image_path.c_str()); - images = OgaImages::Load(image_paths_c); - } - - // Get audios - std::string audio_paths_str; - std::cout << "Audio Path (comma separated; leave empty if no audio):" << std::endl; - std::getline(std::cin, audio_paths_str); - std::unique_ptr audios; - std::vector audio_paths; - for (size_t start = 0, end = 0; end < audio_paths_str.size(); start = end + 1) { - end = audio_paths_str.find(',', start); - audio_paths.push_back(Trim(audio_paths_str.substr(start, end - start))); - } - if (audio_paths.empty()) { - std::cout << "No audio provided" << std::endl; - } else { - std::cout << "Loading audios..." << std::endl; - for (const auto& audio_path : audio_paths) { - if (!FileExists(audio_path.c_str())) { - throw std::runtime_error(std::string("Audio file not found: ") + audio_path); - } - } - std::vector audio_paths_c; - for (const auto& audio_path : audio_paths) audio_paths_c.push_back(audio_path.c_str()); - audios = OgaAudios::Load(audio_paths_c); - } - - std::string text; - std::cout << "Prompt: " << std::endl; - std::getline(std::cin, text); - - // Construct messages string with special tokens for ApplyChatTemplate - std::string content; - for (size_t i = 0; i < image_paths.size(); ++i) - content += "<|image_" + std::to_string(i + 1) + "|>\\n"; - for (size_t i = 0; i < audio_paths.size(); ++i) - content += "<|audio_" + std::to_string(i + 1) + "|>\\n"; - content += text; - - const std::string messages = R"([{"role": "user", "content": ")" + content + R"("}])"; - - std::string prompt = std::string(tokenizer->ApplyChatTemplate("", messages.c_str(), "", true)); - - std::cout << "Processing images, audios, and prompt..." << std::endl; - auto input_tensors = processor->ProcessImagesAndAudios(prompt.c_str(), images.get(), audios.get()); - - std::cout << "Generating response..." << std::endl; - auto params = OgaGeneratorParams::Create(*model); - params->SetSearchOption("max_length", 7680); - - auto generator = OgaGenerator::Create(*model, *params); - generator->SetInputs(*input_tensors); - - while (!generator->IsDone()) { - generator->GenerateNextToken(); - const auto new_token = generator->GetNextTokens()[0]; - std::cout << stream->Decode(new_token) << std::flush; - } - - for (int i = 0; i < 3; ++i) - std::cout << std::endl; - } -} - -int main(int argc, char** argv) { - std::string model_path, ep, ep_library_path; - if (!parse_args(argc, argv, model_path, ep, &ep_library_path)) { - return -1; - } - - std::cout << "--------------------" << std::endl; - std::cout << "Hello, Phi-4-Multimodal!" << std::endl; - std::cout << "--------------------" << std::endl; - CXX_API(model_path.c_str(), ep.c_str(), ep_library_path.c_str()); - - return 0; -} \ No newline at end of file diff --git a/examples/c/src/whisper.cpp b/examples/c/src/whisper.cpp index 32e8c9e029..11b9c37ee2 100644 --- a/examples/c/src/whisper.cpp +++ b/examples/c/src/whisper.cpp @@ -31,7 +31,8 @@ void CXX_API(const char* model_path, int32_t num_beams) { } else { std::cout << "Loading audios..." << std::endl; for (const auto& audio_path : audio_paths) { - if (!FileExists(audio_path.c_str())) { + std::filesystem::path p(audio_path); + if (!std::filesystem::exists(p)) { throw std::runtime_error(std::string("Audio file not found: ") + audio_path); } } @@ -69,8 +70,7 @@ void CXX_API(const char* model_path, int32_t num_beams) { std::cout << processor->Decode(tokens, num_tokens) << std::endl; } - for (int i = 0; i < 3; ++i) - std::cout << std::endl; + std::cout << "\n\n\n"; } } @@ -111,7 +111,8 @@ void C_API(const char* model_path, int32_t num_beams) { } else { std::cout << "Loading audios..." << std::endl; for (const auto& audio_path : audio_paths) { - if (!FileExists(audio_path.c_str())) { + std::filesystem::path p(audio_path); + if (!std::filesystem::exists(p)) { throw std::runtime_error(std::string("Audio file not found: ") + audio_path); } std::vector audio_paths_c; @@ -161,8 +162,8 @@ void C_API(const char* model_path, int32_t num_beams) { std::cout << str << std::endl; } - for (int i = 0; i < 3; ++i) - std::cout << std::endl; + std::cout << "\n\n" + << std::endl; OgaDestroyGenerator(generator); OgaDestroyGeneratorParams(params); diff --git a/examples/chat_app/README.md b/examples/chat_app/README.md deleted file mode 100755 index 3755325c51..0000000000 --- a/examples/chat_app/README.md +++ /dev/null @@ -1,92 +0,0 @@ -# LLM Chat UI - -This is a chat demo using the various versions of the LLMs - -> The app supports all of the CPU, CUDA and DirectML. CUDA is used as an example. - -**Contents**: -- [Setup](#setup) -- [Get the model](#get-the-model) -- [Launch the app](#launch-the-app) - -## Setup - -1. Install **onnxruntime-genai-cuda** - > If you want to use DirectML model, you can download `onnxruntime-genai-directml` package. - - ``` - pip install numpy - pip install --pre onnxruntime-genai-cuda - ``` - -2. Get this example - - ```bash - git clone -n --depth=1 --filter=tree:0 https://github.com/microsoft/onnxruntime-genai.git - cd onnxruntime-genai - git sparse-checkout set --no-cone examples/chat_app - git checkout - cd examples/chat_app - ``` - -3. Install the requirements - - ```bash - pip install huggingface-hub mdtex2html - pip install gradio==4.36.0 # Gradio 3.47 breaks the UI and versions between 3.42 and 3.47 haven't been tested - ``` - - -## Get the model - -> If you already downloaded your model, you can skip this part and add `--model_path` when launching the app -> For example. `python chat_app/app.py -m "/mnt/onnx/Phi-3-vision"` - -```bash -cd .. -huggingface-cli download microsoft/Phi-3-vision-128k-instruct-onnx-cuda --include cuda-int4-rtn-block-32/* --local-dir . -mkdir -p models/cuda -mv cuda-int4-rtn-block-32 models/cuda-int4/Phi-3-vision -``` - -If you would like the app to discover your models, please create the following folder structure, with the `models` folder at the same level as `chat_app`, one folder containing a set of models, and the actual models below this. - -``` ---chat_app ---models - --directml - --phi-3-vision-directml-int4-awq-block-128 - --meta-llama_Llama-2-7b-chat-hf - --mistralai_Mistral-7B-Instruct-v0.1 - ... - --cuda-int4 - --Phi-3-vision -``` - -If there is the word `vision` in the folder name containing the model files, the app will create a UI that processes images. If not, it will create a UI that processes language only. - -## Launch the app - -``` -python app.py -``` - -You can also attach your model that is outside of `models` folder to the app by passing arguments of `--model_path` and `--model_name`. - -```bash -python chat_app/app.py --model_name "Phi-3-vision" --model_path "/mnt/onnx/Phi-3-vision" -``` - -You should see output from console -``` -Running on local URL: http://127.0.0.1:7860 - -To create a public link, set `share=True` in `launch()`. -``` - -Then open the local URL in browser -![alt text](image.png) - -For vision model, you will have the below UI interface. - -![alt text](vision_UI_interface.png) diff --git a/examples/chat_app/__init__.py b/examples/chat_app/__init__.py deleted file mode 100755 index cc2c489b27..0000000000 --- a/examples/chat_app/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -import os -import sys - -sys.path.append(os.path.dirname(os.path.realpath(__file__))) diff --git a/examples/chat_app/app.py b/examples/chat_app/app.py deleted file mode 100755 index cff38054e2..0000000000 --- a/examples/chat_app/app.py +++ /dev/null @@ -1,261 +0,0 @@ -import argparse -import gc -import os -from pathlib import Path - -import gradio as gr -from app_modules.overwrites import postprocess -from app_modules.presets import description, small_and_beautiful_theme, title -from app_modules.utils import cancel_outputing, delete_last_conversation, reset_state, reset_textbox, transfer_input -from interface.hddr_llm_onnx_interface import ONNXModel -from interface.multimodal_onnx_interface import MultiModal_ONNXModel - -top_directory = os.path.join(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) -optimized_directory = os.path.join(top_directory, "models") -available_models = {} - -interface = None - - -def change_model_listener(new_model_name): - global interface - - # if a model exists - shut it down before trying to create the new one - if interface is not None: - interface.shutdown() - del interface - gc.collect() - - d = available_models[new_model_name] - - if "vision" in new_model_name: - print("Configuring for multi-modal model") - interface = MultiModal_ONNXModel( - model_path=d["model_dir"], - execution_provider=d["provider"], - ) - else: - print("Configuring for language-only model") - interface = ONNXModel( - model_path=d["model_dir"], - execution_provider=d["provider"], - ) - - # interface.initialize() - - return [ - new_model_name, - gr.update(visible="vision" in new_model_name), - [], - [], - gr.update(value=""), - "", - ] - - -def change_image_visibility(new_model_name): - if "vision" in new_model_name: - return gr.update(visible=True) - - return gr.update(visible=False) - - -gr.Chatbot.postprocess = postprocess - -with Path(f"{top_directory}/chat_app/assets/custom.css").open() as f: - custom_css = f.read() - - -def interface_predict(*args): - res = interface.predict(*args) - yield from res - - -def interface_retry(*args): - res = interface.retry(*args) - yield from res - - -def get_ep_name(name): - new_name = name.lower().replace("directml", "dml") - if "cpu" in new_name: - return "cpu" - elif "cuda" in new_name: - return "cuda" - elif "dml" in new_name: - return "dml" - elif "nvtensorrtrtx" in new_name: - return "NvTensorRtRtx" - raise ValueError(f"{new_name} is not recognized.") - - -def launch_chat_app(expose_locally: bool = False, model_name: str = "", model_path: str = ""): - if os.path.exists(optimized_directory): - for ep_name in os.listdir(optimized_directory): - sub_optimized_directory = os.path.join(optimized_directory, ep_name) - for model_name in os.listdir(sub_optimized_directory): - available_models[model_name] = { - "model_dir": os.path.join(sub_optimized_directory, model_name), - "provider": get_ep_name(ep_name), - } - - if model_path: - available_models[model_name] = {"model_dir": model_path, "provider": get_ep_name(model_path)} - - with gr.Blocks(css=custom_css, theme=small_and_beautiful_theme) as demo: - history = gr.State([]) - user_question = gr.State("") - with gr.Row(): - gr.HTML(title) - status_display = gr.Markdown("Success", elem_id="status_display") - - with gr.Row(): - with gr.Column(scale=4): - with gr.Row(): - chatbot = gr.Chatbot(elem_id="chuanhu_chatbot", height=650) - with gr.Row(): - with gr.Column(scale=12): - user_input = gr.Textbox(show_label=False, placeholder="Enter text") - with gr.Column(min_width=70, scale=1): - submit_button = gr.Button("Send") - with gr.Column(min_width=70, scale=1): - cancel_button = gr.Button("Stop") - with gr.Row(): - empty_button = gr.Button( - "🧹 New Conversation", - ) - retry_button = gr.Button("🔄 Regenerate") - delete_last_button = gr.Button("🗑️ Remove Last Turn") - reset_args = {"fn": reset_textbox, "inputs": [], "outputs": [user_input, status_display]} - with gr.Column(), gr.Column(min_width=50, scale=1), gr.Tab(label="Parameter Setting"): - gr.Markdown("# Model") - model_name = gr.Dropdown( - choices=list(available_models.keys()), - label="Model", - show_label=False, # default="Empty STUB", - value=next(iter(available_models.keys())), - ) - max_length_tokens = gr.Slider( - minimum=0, - maximum=131072, - value=8192, - step=128, - interactive=True, - label="Max Token Length", - ) - max_context_length_tokens = gr.Slider( - minimum=0, - maximum=131072, - value=8192, - step=128, - interactive=True, - label="Max History Token Length", - ) - token_printing_step = gr.Slider( - minimum=1, maximum=50, value=4, step=1, interactive=True, label="Token Printing Step", visible=False - ) - images = gr.File(file_count="multiple", file_types=["image"], label="Upload image(s)", visible=False) - images.change( - reset_state, - outputs=[chatbot, history, status_display], - show_progress=True, - ) - images.change(**reset_args) - - model_name.change( - change_model_listener, - inputs=[model_name], - outputs=[model_name, images, chatbot, history, user_input, status_display], - ) - gr.Markdown(description) - - predict_args = { - "fn": interface_predict, - "inputs": [ - user_question, - chatbot, - history, - max_length_tokens, - max_context_length_tokens, - token_printing_step, - images, - ], - "outputs": [chatbot, history, status_display], - "show_progress": True, - } - retry_args = { - "fn": interface_retry, - "inputs": [chatbot, history, max_length_tokens, max_context_length_tokens, token_printing_step, images], - "outputs": [chatbot, history, status_display], - "show_progress": True, - } - - # Chatbot - transfer_input_args = { - "fn": transfer_input, - "inputs": [user_input], - "outputs": [user_question, user_input, submit_button], - "show_progress": True, - } - - predict_event1 = user_input.submit(**transfer_input_args).then(**predict_args) - - predict_event2 = submit_button.click(**transfer_input_args).then(**predict_args) - - empty_button.click( - reset_state, - outputs=[chatbot, history, status_display], - show_progress=True, - ) - empty_button.click(**reset_args) - - predict_event3 = retry_button.click(**retry_args) - - delete_last_button.click( - delete_last_conversation, - [chatbot, history], - [chatbot, history, status_display], - show_progress=True, - ) - cancel_button.click( - cancel_outputing, - [], - [status_display], - cancels=[predict_event1, predict_event2, predict_event3], - ) - - demo.load(change_model_listener, inputs=[model_name], outputs=[model_name, images], concurrency_limit=1) - - demo.title = "Local Model UI" - - if expose_locally: - demo.launch(server_name="0.0.0.0", server_port=5000) - else: - demo.launch(share=True, server_port=5000) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--expose_locally", action="store_true") - parser.add_argument( - "--model_path", "-m", type=str, required=False, help="The location where your model is located." - ) - parser.add_argument("--model_name", "-n", type=str, required=False, help="The name of your model") - args = parser.parse_args() - model_path = args.model_path - - if not os.path.exists(optimized_directory) and not model_path: - raise ValueError("Please download the model into models folder or load the model by passing --model_path") - - if args.model_path: - model_name = os.path.basename(model_path) - # check if genai_config.json in the model foler - if "genai_config.json" not in os.listdir(model_path): - raise ValueError( - f"Your model_path folder do not include 'genai.json' file, please double check your model_path '{model_path}'" - ) - - if args.model_name: - model_name = args.model_name - - launch_chat_app(args.expose_locally, model_name, model_path) diff --git a/examples/chat_app/app_modules/overwrites.py b/examples/chat_app/app_modules/overwrites.py deleted file mode 100755 index 8807b89027..0000000000 --- a/examples/chat_app/app_modules/overwrites.py +++ /dev/null @@ -1,28 +0,0 @@ -from __future__ import annotations - -from .presets import gr -from .utils import convert_asis, convert_mdtext, detect_converted_mark - - -def postprocess(self, y: list[tuple[str | None, str | None]]) -> list[tuple[str | None, str | None]]: - """Each message and response should be a string, which may be in Markdown format. - - Returns: - List of tuples representing the message and response. - Each message and response will be a string of HTML. - - """ - if y is None or y == []: - return [] - temp = [] - for x in y: - user, bot = x - if not detect_converted_mark(user): - user = convert_asis(user) - if not detect_converted_mark(bot): - bot = convert_mdtext(bot) - temp.append((user, bot)) - return temp - - -GradioTemplateResponseOriginal = gr.routes.templates.TemplateResponse diff --git a/examples/chat_app/app_modules/presets.py b/examples/chat_app/app_modules/presets.py deleted file mode 100755 index 64a5398ea3..0000000000 --- a/examples/chat_app/app_modules/presets.py +++ /dev/null @@ -1,73 +0,0 @@ -import gradio as gr - -title = """

LLM Chat UI, Powered By ONNX

""" -description = """\ -
-This is a chat demo using the various versions of the LLMs -
-""" -CONCURRENT_COUNT = 100 - - -ALREADY_CONVERTED_MARK = "" - -small_and_beautiful_theme = gr.themes.Soft( - primary_hue=gr.themes.Color( - c50="#02C160", - c100="rgba(2, 193, 96, 0.2)", - c200="#02C160", - c300="rgba(2, 193, 96, 0.32)", - c400="rgba(2, 193, 96, 0.32)", - c500="rgba(2, 193, 96, 1.0)", - c600="rgba(2, 193, 96, 1.0)", - c700="rgba(2, 193, 96, 0.32)", - c800="rgba(2, 193, 96, 0.32)", - c900="#02C160", - c950="#02C160", - ), - secondary_hue=gr.themes.Color( - c50="#576b95", - c100="#576b95", - c200="#576b95", - c300="#576b95", - c400="#576b95", - c500="#576b95", - c600="#576b95", - c700="#576b95", - c800="#576b95", - c900="#576b95", - c950="#576b95", - ), - neutral_hue=gr.themes.Color( - name="gray", - c50="#f9fafb", - c100="#f3f4f6", - c200="#e5e7eb", - c300="#d1d5db", - c400="#B2B2B2", - c500="#808080", - c600="#636363", - c700="#515151", - c800="#393939", - c900="#272727", - c950="#171717", - ), - radius_size=gr.themes.sizes.radius_sm, -).set( - button_primary_background_fill="#06AE56", - button_primary_background_fill_dark="#06AE56", - button_primary_background_fill_hover="#07C863", - button_primary_border_color="#06AE56", - button_primary_border_color_dark="#06AE56", - button_primary_text_color="#FFFFFF", - button_primary_text_color_dark="#FFFFFF", - button_secondary_background_fill="#F2F2F2", - button_secondary_background_fill_dark="#2B2B2B", - button_secondary_text_color="#393939", - button_secondary_text_color_dark="#FFFFFF", - background_fill_primary="#F7F7F7", - background_fill_primary_dark="#1F1F1F", - block_title_text_color="*primary_500", - block_title_background_fill="*primary_100", - input_background_fill="#F6F6F6", -) diff --git a/examples/chat_app/app_modules/utils.py b/examples/chat_app/app_modules/utils.py deleted file mode 100755 index 1ce8ef0060..0000000000 --- a/examples/chat_app/app_modules/utils.py +++ /dev/null @@ -1,222 +0,0 @@ -from __future__ import annotations - -import html -import re - -import gradio as gr -import mdtex2html -from markdown import markdown -from pygments import highlight -from pygments.formatters import HtmlFormatter -from pygments.lexers import ClassNotFound, get_lexer_by_name, guess_lexer - -from .presets import ALREADY_CONVERTED_MARK - - -def markdown_to_html_with_syntax_highlight(md_str): - def replacer(match): - lang = match.group(1) or "text" - code = match.group(2) - lang = lang.strip() - # print(1,lang) - if lang == "text": - lexer = guess_lexer(code) - lang = lexer.name - # print(2,lang) - try: - lexer = get_lexer_by_name(lang, stripall=True) - except ValueError: - lexer = get_lexer_by_name("python", stripall=True) - formatter = HtmlFormatter() - # print(3,lexer.name) - highlighted_code = highlight(code, lexer, formatter) - - return f'
{highlighted_code}
' - - code_block_pattern = r"```(\w+)?\n([\s\S]+?)\n```" - md_str = re.sub(code_block_pattern, replacer, md_str, flags=re.MULTILINE) - - return markdown(md_str) - - -def normalize_markdown(md_text: str) -> str: - lines = md_text.split("\n") - normalized_lines = [] - inside_list = False - - for i, line in enumerate(lines): - if re.match(r"^(\d+\.|-|\*|\+)\s", line.strip()): - if not inside_list and i > 0 and lines[i - 1].strip() != "": - normalized_lines.append("") - inside_list = True - normalized_lines.append(line) - elif inside_list and line.strip() == "": - if i < len(lines) - 1 and not re.match(r"^(\d+\.|-|\*|\+)\s", lines[i + 1].strip()): - normalized_lines.append(line) - continue - else: - inside_list = False - normalized_lines.append(line) - - return "\n".join(normalized_lines) - - -def convert_mdtext(md_text): - code_block_pattern = re.compile(r"```(.*?)(?:```|$)", re.DOTALL) - inline_code_pattern = re.compile(r"`(.*?)`", re.DOTALL) - code_blocks = code_block_pattern.findall(md_text) - non_code_parts = code_block_pattern.split(md_text)[::2] - - result = [] - for non_code, code in zip(non_code_parts, [*code_blocks, ""], strict=False): - if non_code.strip(): - formatted_non_code = normalize_markdown(non_code) - if inline_code_pattern.search(formatted_non_code): - result.append(markdown(formatted_non_code, extensions=["tables"])) - else: - result.append(mdtex2html.convert(formatted_non_code, extensions=["tables"])) - if code.strip(): - formatted_code = f"\n```{code}\n\n```" - formatted_code = markdown_to_html_with_syntax_highlight(formatted_code) - result.append(formatted_code) - result = "".join(result) - result += ALREADY_CONVERTED_MARK - return result - - -def convert_asis(userinput): - return f'

{html.escape(userinput)}

' + ALREADY_CONVERTED_MARK - - -def detect_converted_mark(userinput): - return bool(userinput.endswith(ALREADY_CONVERTED_MARK)) - - -def detect_language(code): - if code.startswith("\n"): - first_line = "" - else: - first_line = code.strip().split("\n", 1)[0] - language = first_line.lower() if first_line else "" - first_line_length = len(first_line) - code_without_language = code[first_line_length:].lstrip() if first_line else code - return language, code_without_language - - -def convert_to_markdown(text): - text = text.replace("$", "$") - - def replace_leading_tabs_and_spaces(line): - new_line = [] - - for char in line: - if char == "\t": - new_line.append(" ") - elif char == " ": - new_line.append(" ") - else: - break - new_line_length = len(new_line) - return "".join(new_line) + line[new_line_length:] - - markdown_text = "" - lines = text.split("\n") - in_code_block = False - - for line in lines: - if in_code_block is False and line.startswith("```"): - in_code_block = True - markdown_text += f"{line}\n" - elif in_code_block is True and line.startswith("```"): - in_code_block = False - markdown_text += f"{line}\n" - elif in_code_block: - markdown_text += f"{line}\n" - else: - stripped_line = replace_leading_tabs_and_spaces(line) - stripped_line = re.sub(r"^(#)", r"\\\1", stripped_line) - markdown_text += f"{stripped_line} \n" - - return markdown_text - - -def add_language_tag(text): - def detect_language(code_block): - try: - lexer = guess_lexer(code_block) - return lexer.name.lower() - except ClassNotFound: - return "" - - code_block_pattern = re.compile(r"(```)(\w*\n[^`]+```)", re.MULTILINE) - - def replacement(match): - code_block = match.group(2) - if match.group(2).startswith("\n"): - language = detect_language(code_block) - if language: - return f"```{language}{code_block}```" - else: - return f"```\n{code_block}```" - else: - return match.group(1) + code_block + "```" - - return code_block_pattern.sub(replacement, text) - - -def delete_last_conversation(chatbot, history): - if len(chatbot) > 0: - chatbot.pop() - - if len(history) > 0: - history.pop() - - return ( - chatbot, - history, - "Delete Done", - ) - - -def reset_state(): - return [], [], "Reset Done" - - -def reset_textbox(): - return gr.update(value=""), "" - - -def cancel_outputing(): - return "Stop Done" - - -def transfer_input(inputs): - return ( - inputs, - gr.update(value=""), - gr.Button(visible=True), - ) - - -class State: - interrupted = False - - def interrupt(self): - self.interrupted = True - - def recover(self): - self.interrupted = False - - -shared_state = State() - - -def is_stop_word_or_prefix(s: str, stop_words: list) -> bool: - for stop_word in stop_words: - if s.endswith(stop_word): - return True - for i in range(1, len(stop_word)): - if s.endswith(stop_word[:i]): - return True - - return False diff --git a/examples/chat_app/assets/custom.css b/examples/chat_app/assets/custom.css deleted file mode 100755 index d9c46c0908..0000000000 --- a/examples/chat_app/assets/custom.css +++ /dev/null @@ -1,487 +0,0 @@ -:root { - --chatbot-color-light: #F3F3F3; - --chatbot-color-dark: #121111; -} - -/* status_display */ -#status_display { - display: flex; - min-height: 2.5em; - align-items: flex-end; - justify-content: flex-end; -} - -#status_display p { - font-size: .85em; - font-family: monospace; - color: var(--body-text-color-subdued); -} - - - -/* usage_display */ -#usage_display { - height: 1em; -} - -#usage_display p { - padding: 0 1em; - font-size: .85em; - font-family: monospace; - color: var(--body-text-color-subdued); -} - -/* list */ -ol:not(.options), -ul:not(.options) { - padding-inline-start: 2em !important; -} - -/* Thank @Keldos-Li for fixing it */ -/* Light mode (default) */ -#chuanhu_chatbot { - background-color: var(--chatbot-color-light) !important; - color: #000000 !important; -} - -[data-testid="bot"] { -} - -[data-testid="user"] { - background-color: #02C160 !important; - color: #F3F3F3 !important; - font-size: medium; -} - -/* Dark mode */ -.dark #chuanhu_chatbot { - background-color: var(--chatbot-color-dark) !important; - color: #F3F3F3 !important; -} - -.dark [data-testid="bot"] { - background-color: #2C2C2C !important; -} - -.dark [data-testid="user"] { - background-color: #26B561 !important; -} - -#chuanhu_chatbot { - height: 100%; - min-height: 400px; -} - -[class *="message"] { - border-radius: var(--radius-xl) !important; - border: none; - font-size: var(--text-md) !important; - line-height: var(--line-md) !important; - min-width: calc(var(--text-md)*var(--line-md) + 2*var(--spacing-xl)); -} - -[data-testid="bot"] { - max-width: 85%; - border-bottom-left-radius: 0 !important; -} - -[data-testid="user"] { - max-width: 85%; - width: auto !important; - border-bottom-right-radius: 0 !important; -} - -/* Table */ -table { - margin: 1em 0; - border-collapse: collapse; - empty-cells: show; -} - -td, -th { - border: 1.2px solid var(--border-color-primary) !important; - padding: 0.2em; -} - -thead { - background-color: rgba(175, 184, 193, 0.2); -} - -thead th { - padding: .5em .2em; -} - -/* Inline code */ -#chuanhu_chatbot code { - display: inline; - white-space: break-spaces; - border-radius: 6px; - margin: 0 2px 0 2px; - padding: .2em .4em .1em .4em; - background-color: rgba(175, 184, 193, 0.2); -} - -/* Code block */ -#chuanhu_chatbot pre code { - display: block; - overflow: auto; - white-space: pre; - background-color: hsla(0, 0%, 0%, 80%) !important; - border-radius: 10px; - padding: 1.4em 1.2em 0em 1.4em; - margin: 1.2em 2em 1.2em 0.5em; - color: #F3F3F3; - box-shadow: 6px 6px 16px hsla(0, 0%, 0%, 0.2); -} - -/* Hightlight */ -#chuanhu_chatbot .highlight { - background-color: transparent -} - -#chuanhu_chatbot .highlight .hll { - background-color: #49483e -} - -#chuanhu_chatbot .highlight .c { - color: #75715e -} - -/* Comment */ -#chuanhu_chatbot .highlight .err { - color: #960050; - background-color: #1e0010 -} - -/* Error */ -#chuanhu_chatbot .highlight .k { - color: #66d9ef -} - -/* Keyword */ -#chuanhu_chatbot .highlight .l { - color: #ae81ff -} - -/* Literal */ -#chuanhu_chatbot .highlight .n { - color: #8828f2 -} - -/* Name */ -#chuanhu_chatbot .highlight .o { - color: #f92672 -} - -/* Operator */ -#chuanhu_chatbot .highlight .p { - color: #482822 -} - -/* Punctuation */ -#chuanhu_chatbot .highlight .ch { - color: #75715e -} - -/* Comment.Hashbang */ -#chuanhu_chatbot .highlight .cm { - color: #75715e -} - -/* Comment.Multiline */ -#chuanhu_chatbot .highlight .cp { - color: #75715e -} - -/* Comment.Preproc */ -#chuanhu_chatbot .highlight .cpf { - color: #75715e -} - -/* Comment.PreprocFile */ -#chuanhu_chatbot .highlight .c1 { - color: #75715e -} - -/* Comment.Single */ -#chuanhu_chatbot .highlight .cs { - color: #75715e -} - -/* Comment.Special */ -#chuanhu_chatbot .highlight .gd { - color: #f92672 -} - -/* Generic.Deleted */ -#chuanhu_chatbot .highlight .ge { - font-style: italic -} - -/* Generic.Emph */ -#chuanhu_chatbot .highlight .gi { - color: #a6e22e -} - -/* Generic.Inserted */ -#chuanhu_chatbot .highlight .gs { - font-weight: bold -} - -/* Generic.Strong */ -#chuanhu_chatbot .highlight .gu { - color: #75715e -} - -/* Generic.Subheading */ -#chuanhu_chatbot .highlight .kc { - color: #66d9ef -} - -/* Keyword.Constant */ -#chuanhu_chatbot .highlight .kd { - color: #66d9ef -} - -/* Keyword.Declaration */ -#chuanhu_chatbot .highlight .kn { - color: #f92672 -} - -/* Keyword.Namespace */ -#chuanhu_chatbot .highlight .kp { - color: #66d9ef -} - -/* Keyword.Pseudo */ -#chuanhu_chatbot .highlight .kr { - color: #66d9ef -} - -/* Keyword.Reserved */ -#chuanhu_chatbot .highlight .kt { - color: #66d9ef -} - -/* Keyword.Type */ -#chuanhu_chatbot .highlight .ld { - color: #162b74 -} - -/* Literal.Date */ -#chuanhu_chatbot .highlight .m { - color: #ae81ff -} - -/* Literal.Number */ -#chuanhu_chatbot .highlight .s { - color: #062b84 -} - -/* Literal.String */ -#chuanhu_chatbot .highlight .na { - color: #a6e22e -} - -/* Name.Attribute */ -#chuanhu_chatbot .highlight .nb { - color: #482822 -} - -/* Name.Builtin */ -#chuanhu_chatbot .highlight .nc { - color: #a6e22e -} - -/* Name.Class */ -#chuanhu_chatbot .highlight .no { - color: #66d9ef -} - -/* Name.Constant */ -#chuanhu_chatbot .highlight .nd { - color: #a6e22e -} - -/* Name.Decorator */ -#chuanhu_chatbot .highlight .ni { - color: #482822 -} - -/* Name.Entity */ -#chuanhu_chatbot .highlight .ne { - color: #a6e22e -} - -/* Name.Exception */ -#chuanhu_chatbot .highlight .nf { - color: #a6e22e -} - -/* Name.Function */ -#chuanhu_chatbot .highlight .nl { - color: #1818f2 -} - -/* Name.Label */ -#chuanhu_chatbot .highlight .nn { - color: #482822 -} - -/* Name.Namespace */ -#chuanhu_chatbot .highlight .nx { - color: #a6e22e -} - -/* Name.Other */ -#chuanhu_chatbot .highlight .py { - color: #482822 -} - -/* Name.Property */ -#chuanhu_chatbot .highlight .nt { - color: #f92672 -} - -/* Name.Tag */ -#chuanhu_chatbot .highlight .nv { - color: #482822 -} - -/* Name.Variable */ -#chuanhu_chatbot .highlight .ow { - color: #f92672 -} - -/* Operator.Word */ -#chuanhu_chatbot .highlight .w { - color: #482822 -} - -/* Text.Whitespace */ -#chuanhu_chatbot .highlight .mb { - color: #ae81ff -} - -/* Literal.Number.Bin */ -#chuanhu_chatbot .highlight .mf { - color: #ae81ff -} - -/* Literal.Number.Float */ -#chuanhu_chatbot .highlight .mh { - color: #ae81ff -} - -/* Literal.Number.Hex */ -#chuanhu_chatbot .highlight .mi { - color: #ae81ff -} - -/* Literal.Number.Integer */ -#chuanhu_chatbot .highlight .mo { - color: #ae81ff -} - -/* Literal.Number.Oct */ -#chuanhu_chatbot .highlight .sa { - color: #162b74 -} - -/* Literal.String.Affix */ -#chuanhu_chatbot .highlight .sb { - color: #161b74 -} - -/* Literal.String.Backtick */ -#chuanhu_chatbot .highlight .sc { - color: #162b74 -} - -/* Literal.String.Char */ -#chuanhu_chatbot .highlight .dl { - color: #162b74 -} - -/* Literal.String.Delimiter */ -#chuanhu_chatbot .highlight .sd { - color: #162b74 -} - -/* Literal.String.Doc */ -#chuanhu_chatbot .highlight .s2 { - color: #162b74 -} - -/* Literal.String.Double */ -#chuanhu_chatbot .highlight .se { - color: #ae81ff -} - -/* Literal.String.Escape */ -#chuanhu_chatbot .highlight .sh { - color: #162b74 -} - -/* Literal.String.Heredoc */ -#chuanhu_chatbot .highlight .si { - color: #162b74 -} - -/* Literal.String.Interpol */ -#chuanhu_chatbot .highlight .sx { - color: #162b74 -} - -/* Literal.String.Other */ -#chuanhu_chatbot .highlight .sr { - color: #162b74 -} - -/* Literal.String.Regex */ -#chuanhu_chatbot .highlight .s1 { - color: #162b74 -} - -/* Literal.String.Single */ -#chuanhu_chatbot .highlight .ss { - color: #162b74 -} - -/* Literal.String.Symbol */ -#chuanhu_chatbot .highlight .bp { - color: #482822 -} - -/* Name.Builtin.Pseudo */ -#chuanhu_chatbot .highlight .fm { - color: #a6e22e -} - -/* Name.Function.Magic */ -#chuanhu_chatbot .highlight .vc { - color: #482822 -} - -/* Name.Variable.Class */ -#chuanhu_chatbot .highlight .vg { - color: #482822 -} - -/* Name.Variable.Global */ -#chuanhu_chatbot .highlight .vi { - color: #482822 -} - -/* Name.Variable.Instance */ -#chuanhu_chatbot .highlight .vm { - color: #482822 -} - -/* Name.Variable.Magic */ -#chuanhu_chatbot .highlight .il { - color: #ae81ff -} - -/* Literal.Number.Integer.Long */ diff --git a/examples/chat_app/assets/custom.js b/examples/chat_app/assets/custom.js deleted file mode 100755 index 219691448b..0000000000 --- a/examples/chat_app/assets/custom.js +++ /dev/null @@ -1 +0,0 @@ -// custom javascript here diff --git a/examples/chat_app/consts.py b/examples/chat_app/consts.py deleted file mode 100755 index 44db59915a..0000000000 --- a/examples/chat_app/consts.py +++ /dev/null @@ -1,8 +0,0 @@ -import logging - -logging.basicConfig( - format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", - level=logging.INFO, -) - -default_prompt = "<|user|>\n<|image_1|>\nWhat is shown in this image?<|end|>\n<|assistant|>\n" diff --git a/examples/chat_app/image.png b/examples/chat_app/image.png deleted file mode 100755 index dc7fc90bb7..0000000000 Binary files a/examples/chat_app/image.png and /dev/null differ diff --git a/examples/chat_app/interface/hddr_llm_onnx_interface.py b/examples/chat_app/interface/hddr_llm_onnx_interface.py deleted file mode 100755 index 8c7941a0fd..0000000000 --- a/examples/chat_app/interface/hddr_llm_onnx_interface.py +++ /dev/null @@ -1,198 +0,0 @@ -import gc -import logging -import os -import sys - -import onnxruntime_genai as og -from app_modules.utils import convert_to_markdown, is_stop_word_or_prefix, shared_state - -current_dir = os.path.dirname(os.path.realpath(__file__)) -sys.path.append(os.path.join(current_dir, "..", "..", "..")) - - -class ONNXModel: - """A wrapper for OnnxRuntime-GenAI to run ONNX LLM model.""" - - def __init__(self, model_path, execution_provider): - self.og = og - - logging.info("Loading model...") - self.config = og.Config(model_path) - self.config.clear_providers() - if execution_provider != "cpu": - self.config.append_provider(execution_provider) - self.model = og.Model(self.config) - logging.info("Loaded model...") - - self.tokenizer = og.Tokenizer(self.model) - self.tokenizer_stream = self.tokenizer.create_stream() - self.model_path = model_path - - if "phi" in self.model_path: - self.template_header = "" - self.enable_history_max = 10 if "mini" in self.model_path else 2 - self.history_template = "<|user|>{input}<|end|><|assistant|>{response}<|end|>" - self.chat_template = "<|user|>{input}<|end|><|assistant|>" - elif "Llama-3" in self.model_path: - self.enable_history_max = 2 - self.template_header = """<|start_header_id|>system<|end_header_id|> -You are a helpful AI assistant.<|eot_id|>""" - self.history_template = """<|start_header_id|>user<|end_header_id|> -{input}<|eot_id|><|start_header_id|>assistant<|end_header_id|> -{response}<|eot_id|>""" - - self.chat_template = """<|start_header_id|>user<|end_header_id|> -{input}<|eot_id|><|start_header_id|>assistant<|end_header_id|>""" - - # self.chat_template = llama3_template - else: - self.enable_history_max = 2 - self.template_header = "" - self.history_template = "[INST] {input} [/INST]{response}" - self.chat_template = "[INST] {input} [/INST]" - - def generate_prompt_with_history(self, text, history, max_length=2048): - prompt = "" - - for dialog in history[-self.enable_history_max :]: - prompt += f"{self.history_template.format(input=dialog[0], response=dialog[1])}" - - prompt = self.template_header + prompt - - prompt += f"{self.chat_template.format(input=text)}" - - input_ids = self.tokenizer.encode(prompt) - - if len(input_ids) <= max_length: - return input_ids - else: - history.clear() - if "Llama-3" in self.model_path: - prompt = self.template_header - prompt += f"{self.chat_template.format(input=text)}" - return self.tokenizer.encode(prompt) - - def search( - self, - input_ids, - max_length: int, - token_printing_step: int = 4, - ): - output_tokens = [] - - params = og.GeneratorParams(self.model) - search_options = {"max_length": max_length} - params.set_search_options(**search_options) - - generator = og.Generator(self.model, params) - generator.append_tokens(input_ids) - - idx = 0 - while not generator.is_done(): - idx += 1 - generator.generate_next_token() - next_token = generator.get_next_tokens()[0] - output_tokens.append(next_token) - - if idx % token_printing_step == 0: - yield self.tokenizer.decode(output_tokens) - - def predict(self, text, chatbot, history, max_length_tokens, max_context_length_tokens, token_printing_step, *args): - if text == "": - yield chatbot, history, "Empty context." - return - - inputs = self.generate_prompt_with_history(text, history, max_length=max_context_length_tokens) - - if inputs is None: - yield chatbot, history, "Input too long." - return - - input_ids = inputs[-max_context_length_tokens:] - - human_tokens = [ - "[|Human|]", - "Human:", - "### HUMAN:", - "### User:", - "USER:", - "<|im_start|>user", - "<|user|>", - "### Instruction:", - "GPT4 Correct User:", - ] - - ai_tokens = [ - "[|AI|]", - "AI:", - "### RESPONSE:", - "### Response:", - "ASSISTANT:", - "<|im_start|>assistant", - "<|assistant|>", - "GPT4 Correct Assistant:", - "### Assistant:", - ] - - for x in self.search( - input_ids, - max_length=max_length_tokens, - token_printing_step=token_printing_step, - ): - sentence = x - - if is_stop_word_or_prefix(sentence, ["[|Human|]", "[|AI|]", "Human:", "AIL"]) is False: - for human_token in human_tokens: - if human_token in sentence: - sentence = sentence[: sentence.index(human_token)].strip() - break - - for ai_token in ai_tokens: - if ai_token in sentence: - sentence = sentence[: sentence.index(ai_token)].strip() - break - sentence = sentence.strip() - a, b = ( - [[y[0], convert_to_markdown(y[1])] for y in history] + [[text, convert_to_markdown(sentence)]], - [ - *history, - [text, sentence], - ], - ) - yield a, b, "Generating..." - - if shared_state.interrupted: - shared_state.recover() - try: - yield a, b, "Stop: Success" - return - except Exception as e: - print(type(e).__name__, e) - - del input_ids - gc.collect() - - try: - yield a, b, "Generate: Success" - except Exception as e: - print(type(e).__name__, e) - - return - - def shutdown(self): - pass - - def retry(self, chatbot, history, max_length_tokens, max_context_length_tokens, token_printing_step): - if len(history) == 0: - yield chatbot, history, "Empty context" - return - chatbot.pop() - inputs = history.pop()[0] - yield from self.predict( - inputs, - chatbot, - history, - max_length_tokens, - max_context_length_tokens, - token_printing_step, - ) diff --git a/examples/chat_app/interface/multimodal_onnx_interface.py b/examples/chat_app/interface/multimodal_onnx_interface.py deleted file mode 100755 index 909915a540..0000000000 --- a/examples/chat_app/interface/multimodal_onnx_interface.py +++ /dev/null @@ -1,130 +0,0 @@ -import gc - -import onnxruntime_genai as og -from app_modules.utils import convert_to_markdown, shared_state -from consts import default_prompt, logging - -logging.getLogger("interface") - - -class MultiModal_ONNXModel: - """A wrapper for ONNXRuntime GenAI to run ONNX Multimodal model""" - - def __init__(self, model_path, execution_provider): - self.og = og - - logging.info("Loading model...") - self.config = og.Config(model_path) - self.config.clear_providers() - if execution_provider != "cpu": - self.config.append_provider(execution_provider) - self.model = og.Model(self.config) - logging.info("Loaded model ...") - - self.processor = self.model.create_multimodal_processor() - self.tokenizer = self.processor.create_stream() - - self.enable_history_max = 2 - self.template_header = "" - self.history_template = "[INST] {input} [/INST]{response}" - self.chat_template = "<|user|>\n{tags}\n{input}<|end|>\n<|assistant|>\n" - - def generate_prompt_with_history(self, images, history, text=default_prompt, max_length=3072): - prompt = "" - - for dialog in history[-self.enable_history_max :]: - prompt += f"{self.history_template.format(input=dialog[0], response=dialog[1])}" - - prompt = self.template_header + prompt - - image_tags = "" - for i in range(len(images)): - image_tags += f"<|image_{i + 1}|>\n" - - prompt += f"{self.chat_template.format(input=text, tags=image_tags)}" - if len(prompt) > max_length: - history.clear() - prompt = f"{self.chat_template.format(input=text, tags=image_tags)}" - - self.images = og.Images.open(*images) - - logging.info("Preprocessing images and prompt ...") - inputs = self.processor(prompt, images=self.images) - return inputs - - def search(self, inputs, max_length: int = 3072, token_printing_step: int = 1): - output = "" - params = og.GeneratorParams(self.model) - params.set_inputs(inputs) - - search_options = {"max_length": max_length} - params.set_search_options(**search_options) - generator = og.Generator(self.model, params) - - idx = 0 - while not generator.is_done(): - idx += 1 - generator.generate_next_token() - next_token = generator.get_next_tokens()[0] - output += self.tokenizer.decode(next_token) - - return output - - def predict(self, text, chatbot, history, max_length_tokens, max_context_length_tokens, token_printing_step, *args): - if text == "": - yield chatbot, history, "Empty context" - return - - inputs = self.generate_prompt_with_history( - text=text, history=history, images=args[0], max_length=max_context_length_tokens - ) - - sentence = self.search( - inputs, - max_length=max_length_tokens, - token_printing_step=token_printing_step, - ) - - sentence = sentence.strip() - a, b = ( - [[y[0], convert_to_markdown(y[1])] for y in history] + [[text, convert_to_markdown(sentence)]], - [ - *history, - [text, sentence], - ], - ) - yield a, b, "Generating ... " - - if shared_state.interrupted: - shared_state.recover() - try: - yield a, b, "Stop: Success" - return - except Exception as e: - print(type(e).__name__, e) - - del inputs - gc.collect() - - try: - yield a, b, "Generate: Success" - - except Exception as e: - print(type(e).__name__, e) - - return - - def shutdown(self): - pass - - def retry(self, chatbot, history, max_length_tokens, max_context_length_tokens, token_printing_step, *args): - if len(history) == 0: - yield chatbot, history, "Empty context" - return - - chatbot.pop() - inputs = history.pop()[0] - - yield from self.predict( - inputs, chatbot, history, max_length_tokens, max_context_length_tokens, token_printing_step, args[0] - ) diff --git a/examples/chat_app/vision_UI_interface.png b/examples/chat_app/vision_UI_interface.png deleted file mode 100644 index 48fecec3c1..0000000000 Binary files a/examples/chat_app/vision_UI_interface.png and /dev/null differ diff --git a/examples/csharp/Common/Common.cs b/examples/csharp/Common/Common.cs new file mode 100644 index 0000000000..d15476c374 --- /dev/null +++ b/examples/csharp/Common/Common.cs @@ -0,0 +1,1101 @@ +using Microsoft.ML.OnnxRuntime; +using Microsoft.ML.OnnxRuntimeGenAI; +using System.CommandLine; +using System.Reflection; +using System.Reflection.Metadata.Ecma335; +using System.Text; +using System.Text.Encodings.Web; +using System.Text.Json; +using System.Text.Json.Serialization; + +namespace CommonUtils +{ + public static class Common + { + /// + /// Set log options inside ORT GenAI + /// + /// Dump inputs to the model in the console + /// Dump outputs to the model in the console + /// + /// None + /// + public static void SetLogger(bool inputs = true, bool outputs = true) + { + Utils.SetLogBool("enabled", true); + Utils.SetLogBool("model_input_values", inputs); + Utils.SetLogBool("model_output_values", outputs); + } + + /// + /// Register execution provider if path is provided + /// + /// Name of execution provider to set + /// Path to execution provider to set + /// + /// None + /// + public static void RegisterEP(string ep, string ep_path) + { + if (string.IsNullOrEmpty(ep_path)) + { + return; // No library path specified, skip registration + } + + Console.WriteLine($"Registering execution provider: {ep_path}"); + + var ortEnv = OrtEnv.Instance(); + if (string.Equals(ep, "cuda", StringComparison.OrdinalIgnoreCase)) + { + ortEnv.RegisterExecutionProviderLibrary("CUDAExecutionProvider", ep_path); + } + else if (string.Equals(ep, "NvTensorRtRtx", StringComparison.OrdinalIgnoreCase)) + { + ortEnv.RegisterExecutionProviderLibrary("NvTensorRTRTXExecutionProvider", ep_path); + } + else + { + Console.WriteLine($"Warning: EP registration not supported for {ep}"); + Console.WriteLine("Only 'cuda' and 'NvTensorRtRtx' support plug-in libraries."); + return; + } + + Console.WriteLine($"Registered {ep} successfully!"); + } + + /// + /// Get Config object and set EP-specific and search-specific options inside it + /// + /// Path to model folder containing GenAI config + /// Name of execution provider to set + /// Map of EP-specific option names and their values + /// Class of search-specific option names and their values + /// + /// ORT GenAI config object with all options set + /// + public static Config GetConfig(string path, string ep, Dictionary? ep_options, GeneratorParamsArgs search_options) + { + var config = new Config(path); + if (ep != "follow_config") + { + config.ClearProviders(); + if (ep != "cpu") + { + Console.WriteLine($"Setting model to {ep}"); + config.AppendProvider(ep); + } + + // Set any EP-specific options + if (ep_options != null) + { + foreach (var kvp in ep_options) + { + var k = kvp.Key; + var v = kvp.Value; + if (k == "enable_cuda_graph" && (ep == "cuda" || ep == "NvTensorRtRtx") && search_options.num_beams > 1) + { + // Disable CUDA graph if using beam search (num_beams > 1), + // num_beams > 1 requires past_present_share_buffer to be false so enable_cuda_graph must be false + config.SetProviderOption(ep, "enable_cuda_graph", "0"); + } + else + { + config.SetProviderOption(ep, k, v); + } + } + } + } + + /** + * TODO: Uncomment the below snippet to use config.Overlay once the C# binding to Config.Overlay + * is in a stable package release. + */ + + // // Create serializer context to skip null attributes + // var options = new JsonSerializerOptions() + // { + // WriteIndented = true, + // PropertyNamingPolicy = JsonNamingPolicy.CamelCase, + // DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull, + // }; + // var ctx = new ArgsSerializerContext(options); + // var json = JsonSerializer.Serialize(search_options, ctx.GeneratorParamsArgs); + + // // Set any search-specific options that need to be known before constructing a Model object + // // Otherwise they can be set with params.SetSearchOptions(search_options) + // config.Overlay(json); + return config; + } + + /// + /// Set search options for a generator's params during decoding + /// + /// Generator params object to set on + /// Arguments provided by user + /// Use verbose logging + /// + /// None + /// + public static void SetSearchOptions(GeneratorParams generatorParams, GeneratorParamsArgs args, bool verbose) + { + var type = args.GetType(); + var options = new List(); + foreach (var prop in type.GetProperties(BindingFlags.Instance | BindingFlags.Public)) + { + var name = prop.Name; + var value = prop.GetValue(args); + if (value == null || name == "chunk_size") continue; + + if (name == "do_sample") + { + var val = Convert.ToBoolean(value); + options.Add($"{name}: {val}"); + generatorParams.SetSearchOption(name, val); + } + else + { + var val = Convert.ToDouble(value); + options.Add($"{name}: {val}"); + generatorParams.SetSearchOption(name, val); + } + } + + if (verbose) Console.WriteLine("GeneratorParams created: {" + string.Join(", ", options) + "}"); + } + + /// + /// Apply the chat template with various fallback options + /// + /// Path to folder containing model + /// Tokenizer object to use + /// String-encoded list of messages + /// Add tokens to indicate the start of the AI's response + /// String-encoded list of tools + /// + /// Prompt to encode + /// + public static string ApplyChatTemplate(string model_path, Tokenizer tokenizer, string messages, bool add_generation_prompt, string tools = "") + { + var template_str = ""; + var jinja_path = Path.Combine(model_path, "chat_template.jinja"); + if (File.Exists(jinja_path)) + { + template_str = File.ReadAllText(jinja_path, Encoding.UTF8); + } + + var prompt = tokenizer.ApplyChatTemplate( + messages: messages, + tools: tools, + add_generation_prompt: add_generation_prompt, + template_str: template_str + ); + return prompt; + } + + /// + /// Get prompt for 'user' role in chat template + /// + /// Provided prompt + /// Interactive mode (otherwise uses either user-provided prompt or default) + /// + /// Prompt to use + /// + public static string GetUserPrompt(string prompt, bool interactive) + { + string? text; + while (true) + { + if (interactive) + { + Console.Write("Prompt (Use quit() to exit): "); + text = Console.ReadLine(); + } + else + { + text = prompt; + } + + if (string.IsNullOrEmpty(text)) + { + Console.WriteLine("Empty input. Please enter a valid prompt."); + continue; // Skip to the next iteration if input is empty + } + else + { + break; + } + } + + return text; + } + + /// + /// Get paths to media for user + /// + /// User-provided media paths + /// Interactive mode (otherwise uses either user-provided media paths or default) + /// The media type being obtained + /// + /// All media filepaths to read and encode + /// + public static List GetUserMediaPaths(List media_paths, bool interactive, string media_type) + { + // Check media type + var media_type_lower = media_type.ToLowerInvariant(); + if (media_type_lower != "audio" && media_type_lower != "image") + { + throw new Exception("Media type must be 'image' or 'audio'"); + } + var media_type_capitalized = char.ToUpperInvariant(media_type_lower[0]) + media_type_lower[1..]; + + var paths = new List(); + if (media_paths.Count > 0) + { + // If user-provided media paths + paths = media_paths; + } + else if (interactive) + { + // If interactive mode is on + Console.Write($"{media_type_capitalized} Path (comma separated; leave empty if no {media_type_lower}): "); + var line = Console.ReadLine() ?? string.Empty; + + // Split by comma, trim whitespace and surrounding quotes + paths = line.Split(',', StringSplitOptions.RemoveEmptyEntries) + .Select(p => + { + // Trim quotes + var s = p.Trim(); + if (s.Length >= 2 && ((s[0] == '"' && s[^1] == '"') || (s[0] == '\'' && s[^1] == '\''))) + { + s = s[1..^1]; // strip surrounding quotes + } + return s; + }) + .Where(p => !string.IsNullOrWhiteSpace(p)) + .ToList(); + } + + paths = paths.Where(p => !string.IsNullOrWhiteSpace(p)).Select(p => p.Trim()).ToList(); + foreach (var path in paths) + { + if (!File.Exists(path)) + { + throw new Exception($"{media_type_capitalized} file not found: {path}"); + } + Console.WriteLine($"Using {media_type_lower}: {path}"); + } + + return paths; + } + + /// + /// Get images for user + /// + /// User-provided image paths + /// Interactive mode (otherwise uses either user-provided image paths or default) + /// + /// (all images, number of images) as a tuple + /// + public static (Images?, int) GetUserImages(List image_paths, bool interactive) + { + var media_type = "image"; + List paths = GetUserMediaPaths(image_paths, interactive, media_type); + if (paths.Count == 0) + { + Console.WriteLine($"No {media_type} provided"); + return (null, 0); + } + + var images = Images.Load(paths.ToArray()); + return (images, paths.Count); + } + + /// + /// Get audios for user + /// + /// User-provided audio paths + /// Interactive mode (otherwise uses either user-provided audio paths or default) + /// + /// (all audios, number of audios) as a tuple + /// + public static (Audios?, int) GetUserAudios(List audio_paths, bool interactive) + { + var media_type = "audio"; + List paths = GetUserMediaPaths(audio_paths, interactive, media_type); + if (paths.Count == 0) + { + Console.WriteLine($"No {media_type} provided"); + return (null, 0); + } + + var audios = Audios.Load(paths.ToArray()); + return (audios, paths.Count); + } + + /// + /// Get content for 'user' role in chat template + /// + /// Model type inside ORT GenAI + /// Number of images + /// Number of audios + /// User prompt + /// + /// Combined content for 'user' role + /// + public static string GetUserContent(string model_type, int num_images, int num_audios, string prompt) + { + string content; + // Combine all image tags, audio tags, and text into one user content + if (model_type == "phi3v") + { + // Phi-3 vision, Phi-3.5 vision + var image_tags = ""; + for (int i = 0; i < num_images; i++) + { + image_tags += $"<|image_{i + 1}|>\n"; + } + content = image_tags + prompt; + } + else if (model_type == "phi4mm") + { + // Phi-4 multimodal + var image_tags = ""; + for (int i = 0; i < num_images; i++) + { + image_tags += $"<|image_{i + 1}|>\n"; + } + var audio_tags = ""; + for (int i = 0; i < num_audios; i++) + { + audio_tags += $"<|audio_{i + 1}|>\n"; + } + content = image_tags + audio_tags + prompt; + } + else if (model_type == "qwen2_5_vl" || model_type == "fara") + { + // Qwen-2.5 VL, Fara + var image_tags = ""; + for (int i = 0; i < num_images; i++) + { + image_tags += "<|vision_start|><|image_pad|><|vision_end|>"; + } + content = image_tags + prompt; + } + else + { + // Gemma-3 style: structured content + var list = new List>(); + for (int i = 0; i < num_images; i++) + { + list.Add(new Dictionary + { + ["type"] = "image" + }); + } + list.Add(new Dictionary + { + ["type"] = "text", + ["text"] = prompt + }); + content = JsonSerializer.Serialize(list); + } + + return content; + } + + /// + /// Convert a list of tools to a list of tool schemas + /// + /// List of OpenAI-compatible tools + /// + /// List of JSON schema compatible tools + /// + public static IList ToolsToSchemas(IList tools) + { + var tool_schemas = new List { }; + foreach (var tool in tools) + { + var name = new Dictionary() + { + { "const", tool.Function.Name } + }; + var properties = new Dictionary + { + { "name", name } + }; + + var tool_parameters_exist = tool.Function.Parameters.Count != 0; + if (tool_parameters_exist) + { + var parameters = new Dictionary + { + { "type", tool.Function.Parameters.GetValueOrDefault("type", "object") }, + { "properties", tool.Function.Parameters.GetValueOrDefault("properties", new Dictionary{}) }, + { "required", tool.Function.Parameters.GetValueOrDefault("required", new List{}) } + }; + properties.Add("parameters", parameters); + } + + var tool_schema = new ToolSchema() + { + Description = tool.Function.Description, + Type = "object", + Properties = properties, + Required = tool_parameters_exist ? ["name", "parameters"] : ["name"], + AdditionalProperties = false + }; + tool_schemas.Add(tool_schema); + } + return tool_schemas; + } + + /// + /// Create a JSON schema from a list of tools + /// + /// List of OpenAI-compatible tools + /// Output can have a tool call + /// + /// JSON schema as a JSON-compatible string + /// + public static string GetJsonSchema(IList tools, bool tool_output) + { + var schemas = ToolsToSchemas(tools); + var x_guidance = new Dictionary + { + { "whitespace_flexible", false }, + { "key_separator", ": "}, + { "item_separator", ", " } + }; + var json_schema = new JsonSchema + { + XGuidance = x_guidance, + Type = "array", + Items = new Dictionary>{ + { "anyOf", schemas } + }, + MinItems = tool_output ? 1 : 0 + }; + + // Create serializer context with encoder to not escape non-ASCII characters (e.g. don't convert '&' to \u0026) + // and to skip null attributes + var options = new JsonSerializerOptions() + { + WriteIndented = true, + PropertyNamingPolicy = JsonNamingPolicy.CamelCase, + Encoder = JavaScriptEncoder.UnsafeRelaxedJsonEscaping, + DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault, + }; + var ctx = new ToolSerializerContext(options); + + return JsonSerializer.Serialize(json_schema, ctx.JsonSchema); + } + + /// + /// Create a LARK grammar from a list of tools + /// + /// List of OpenAI-compatible tools + /// Output can have text + /// Output can have a tool call + /// String representation of tool call starting token + /// String representation of tool call ending token + /// + /// LARK grammar as a string + /// + public static string GetLarkGrammar(IList tools, bool text_output, bool tool_output, string tool_call_start, string tool_call_end) + { + var known_tool_call_ids = !string.IsNullOrEmpty(tool_call_start) && !string.IsNullOrEmpty(tool_call_end); + var call_type = known_tool_call_ids ? "toolcall" : "functioncall"; + + var rows = new List(); + string? start_row; + if (text_output && !tool_output) + { + start_row = "start: TEXT"; + } + else if (!text_output && tool_output) + { + start_row = $"start: {call_type}"; + } + else if (text_output && tool_output) + { + start_row = $"start: TEXT | {call_type}"; + } + else + { + throw new Exception("At least one of 'text_output' and 'tool_output' must be true"); + } + rows.Add(start_row); + + if (text_output) + { + var text_row = "TEXT: /[^{<](.|\\n)*/"; + rows.Add(text_row); + } + + if (tool_output) + { + var schema = GetJsonSchema(tools: tools, tool_output: tool_output); + if (known_tool_call_ids) + { + var tool_row = $"toolcall: {tool_call_start} functioncall {tool_call_end}"; + rows.Add(tool_row); + } + + var func_row = $"functioncall: %json {schema}"; + rows.Add(func_row); + } + + var grammar = string.Join("\n", rows); + return grammar; + } + + /// + /// Convert a JSON-deserialized object of tools to a list of Tool objects + /// + /// JSON-deserialized object containing OpenAI-compatible tool definitions + /// + /// List of Tool objects + /// + public static IList ToTool(IList> tool_defs) + { + var tools = new List { }; + foreach (var tool_def in tool_defs) + { + if (tool_def.TryGetValue("function", out var functionObj)) + { + var functionStr = JsonSerializer.Serialize(functionObj); + var functionDict = JsonSerializer.Deserialize(functionStr, ToolSerializerContext.Default.DictionaryStringObject); + if (functionDict == null) continue; + + var name = functionDict.TryGetValue("name", out var nameObj) ? nameObj?.ToString() ?? string.Empty : string.Empty; + var description = functionDict.TryGetValue("description", out var descObj) ? descObj?.ToString() ?? string.Empty : string.Empty; + + if (functionDict.TryGetValue("parameters", out var paramObj)) + { + var paramStr = JsonSerializer.Serialize(paramObj); + var paramDict = JsonSerializer.Deserialize(paramStr, ToolSerializerContext.Default.DictionaryStringObject); + if (paramDict == null) continue; + + var func = new FunctionDefinition + { + Name = name, + Description = description, + Parameters = paramDict + }; + var tool = new Tool() + { + Type = "function", + Function = func + }; + tools.Add(tool); + } + } + } + return tools; + } + + /// + /// Create a grammar to use with LLGuidance + /// + /// Type of format requested + /// Path to file containing OpenAI-compatible tool definitions + /// JSON-serialized string containing OpenAI-compatible tool definitions + /// List of OpenAI-compatible tools defined in memory + /// Output can have text + /// Output can have a tool call + /// String representation of tool call starting token (e.g. ) + /// String representation of tool call ending token (e.g. ) + /// + /// (grammar type, grammar data, tools) as a tuple of strings + /// + public static (string, string, string) GetGuidance( + string response_format = "", + string filepath = "", + string tools_str = "", + List? tools = null, + bool text_output = true, + bool tool_output = false, + string tool_call_start = "", + string tool_call_end = "") + { + var guidance_type = ""; + var guidance_data = ""; + IList all_tools = []; + + // Get list of tools from a range of sources (filepath, JSON-serialized string, in-memory) + if (tool_output) + { + if (File.Exists(filepath)) + { + var json_str = File.ReadAllText(filepath); + if (string.IsNullOrWhiteSpace(json_str)) + { + throw new Exception("Error: JSON file is empty."); + } + + var tool_defs = JsonSerializer.Deserialize(json_str, ToolSerializerContext.Default.IListDictionaryStringObject); + if (tool_defs == null) + { + throw new Exception("Error: Tools did not de-serialize correctly"); + } + all_tools = ToTool(tool_defs); + } + else if (!string.IsNullOrEmpty(tools_str)) + { + var tool_defs = JsonSerializer.Deserialize(tools_str, ToolSerializerContext.Default.IListDictionaryStringObject); + if (tool_defs == null) + { + throw new Exception("Error: Tools did not de-serialize correctly"); + } + all_tools = ToTool(tool_defs); + } + else if (tools != null && tools.Count > 0) + { + try + { + all_tools = ToTool(tools.Cast>().ToList()); + } + catch + { + Console.WriteLine("Could not convert tools from List to List>"); + try + { + all_tools = tools.Cast().ToList(); + } + catch + { + Console.WriteLine("Could not convert tools from List to List"); + } + } + } + else + { + throw new Exception("Error: Please provide the list of tools through a file, JSON-serialized string, or a list of tools"); + } + + if (all_tools.Count <= 0) + { + throw new Exception("Error: Could not obtain a list of tools in memory"); + } + } + + // Create guidance based on user-provided response format + if (response_format == "text" || response_format == "lark_grammar") + { + if (response_format == "text") + { + var right_settings = text_output && !tool_output; + if (!right_settings) + { + throw new Exception("Error: A response format of 'text' requires text_output = true and tool_output = false"); + } + } + + guidance_type = "lark_grammar"; + guidance_data = GetLarkGrammar( + tools: all_tools, + text_output: text_output, + tool_output: tool_output, + tool_call_start: tool_call_start, + tool_call_end: tool_call_end); + } + else if (response_format == "json_schema" || response_format == "json_object") + { + var right_settings = tool_output && !text_output; + if (!right_settings) + { + throw new Exception("Error: A response format of 'json_schema' or 'json_object' requires text_output = false and tool_output = true"); + } + + guidance_type = "json_schema"; + guidance_data = GetJsonSchema(tools: all_tools, tool_output: tool_output); + } + else + { + throw new Exception("Error: Invalid response format provided"); + } + + return (guidance_type, guidance_data, JsonSerializer.Serialize(all_tools, ToolSerializerContext.Default.IListTool)); + } + + /// + /// Add arguments for the generator params + /// + /// Original parser object with existing arguments + /// + /// None + /// + public static void GetGeneratorParamsArgs(RootCommand parser) + { + var batch_size = new Option( + name: "batch_size", + aliases: ["-b", "--batch_size"] + ) + { + Arity = ArgumentArity.ExactlyOne, + DefaultValueFactory = (_) => 1, + Description = "Batch size for input payload" + }; + + var chunk_size = new Option( + name: "chunk_size", + aliases: ["-c", "--chunk_size"] + ) + { + Arity = ArgumentArity.ExactlyOne, + DefaultValueFactory = (_) => 0, + Description = "Chunk size for prefill chunking during context processing (default: 0 = disabled, >0 = enabled)" + }; + + var do_sample = new Option( + name: "do_sample", + aliases: ["-s", "--do_sample"] + ) + { + Arity = ArgumentArity.Zero, + DefaultValueFactory = (_) => false, + Description = "Do random sampling. When false, greedy or beam search are used to generate the output. Defaults to false" + }; + + var min_length = new Option( + name: "min_length", + aliases: ["-i", "--min_length"] + ) + { + Arity = ArgumentArity.ExactlyOne, + Description = "Min number of tokens to generate including the prompt" + }; + + var max_length = new Option( + name: "max_length", + aliases: ["-l", "--max_length"] + ) + { + Arity = ArgumentArity.ExactlyOne, + Description = "Max number of tokens to generate including the prompt" + }; + + var num_beams = new Option( + name: "num_beams", + aliases: ["-nb", "--num_beams"] + ) + { + Arity = ArgumentArity.ExactlyOne, + DefaultValueFactory = (_) => 1, + Description = "Number of beams to create" + }; + + var num_return_sequences = new Option( + name: "num_return_sequences", + aliases: ["-rs", "--num_return_sequences"] + ) + { + Arity = ArgumentArity.ExactlyOne, + DefaultValueFactory = (_) => 1, + Description = "Number of return sequences to produce" + }; + + var repetition_penalty = new Option( + name: "repetition_penalty", + aliases: ["-r", "--repetition_penalty"] + ) + { + Arity = ArgumentArity.ExactlyOne, + Description = "Repetition penalty to sample with" + }; + + var temperature = new Option( + name: "temperature", + aliases: ["-t", "--temperature"] + ) + { + Arity = ArgumentArity.ExactlyOne, + Description = "Temperature to sample with" + }; + + var top_k = new Option( + name: "top_k", + aliases: ["-k", "--top_k"] + ) + { + Arity = ArgumentArity.ExactlyOne, + Description = "Top k tokens to sample from" + }; + + var top_p = new Option( + name: "top_p", + aliases: ["-p", "--top_p"] + ) + { + Arity = ArgumentArity.ExactlyOne, + Description = "Top p probability to sample with" + }; + + parser.Add(batch_size); + parser.Add(chunk_size); + parser.Add(do_sample); + parser.Add(min_length); + parser.Add(max_length); + parser.Add(num_beams); + parser.Add(num_return_sequences); + parser.Add(repetition_penalty); + parser.Add(temperature); + parser.Add(top_k); + parser.Add(top_p); + } + + /// + /// Add arguments for guidance options + /// + /// Original parser object with existing arguments + /// + /// None + /// + public static void GetGuidanceArgs(RootCommand parser) + { + var response_format = new Option( + name: "response_format", + aliases: ["-rf", "--response_format"] + ) + { + Arity = ArgumentArity.ExactlyOne, + DefaultValueFactory = (_) => "", + Description = "Provide response format for the model", + }; + response_format.Validators.Add(result => { + var value = result.GetValue(response_format)!; + if (string.IsNullOrEmpty(value)) return; + + var options = new List { "text", "json_object", "json_schema", "lark_grammar" }; + if (!options.Contains(value)) + { + var options_str = string.Join(", ", options); + result.AddError($"Response format must be from one of the options: {options_str}"); + } + }); + + var tools_file = new Option( + name: "tools_file", + aliases: ["-tf", "--tools_file"] + ) + { + Arity = ArgumentArity.ExactlyOne, + DefaultValueFactory = (_) => "", + Description = "Path to file containing list of OpenAI-compatible tool definitions. Ex: test/test_models/tool-definitions/weather.json" + }; + tools_file.Validators.Add(result => + { + var value = result.GetValue(tools_file)!; + if (string.IsNullOrEmpty(value)) return; + + if (!value.EndsWith(".json")) + { + result.AddError("Path must be to a .json file"); + } + if (!File.Exists(value)) + { + result.AddError("JSON file does not exist"); + } + }); + + var text_output = new Option( + name: "text_output", + aliases: ["-text", "--text_output"] + ) + { + Arity = ArgumentArity.Zero, + DefaultValueFactory = (_) => false, + Description = "Produce a text response in the output" + }; + + var tool_output = new Option( + name: "tool_output", + aliases: ["-tool", "--tool_output"] + ) + { + Arity = ArgumentArity.Zero, + DefaultValueFactory = (_) => false, + Description = "Produce a tool call in the output" + }; + + var tool_call_start = new Option( + name: "tool_call_start", + aliases: ["-tcs", "--tool_call_start"] + ) + { + Arity = ArgumentArity.ExactlyOne, + DefaultValueFactory = (_) => "", + Description = "String representation of tool call start (ex: <|tool_call|>). Needs to be marked as special in tokenizer.json for guidance to work." + }; + + var tool_call_end = new Option( + name: "tool_call_end", + aliases: ["-tce", "--tool_call_end"] + ) + { + Arity = ArgumentArity.ExactlyOne, + DefaultValueFactory = (_) => "", + Description = "String representation of tool call end (ex: <|/tool_call|>). Needs to be marked as special in tokenizer.json for guidance to work." + }; + + parser.Add(response_format); + parser.Add(tools_file); + parser.Add(text_output); + parser.Add(tool_output); + parser.Add(tool_call_start); + parser.Add(tool_call_end); + } + + /// + /// Set arguments for generator params and guidance + /// + /// Parsed result with user-provided arguments + /// + /// (GeneratorParamsArgs, GuidanceArgs) as a tuple of user-provided arguments + /// + public static (GeneratorParamsArgs, GuidanceArgs) SetGroupedArgs(ParseResult parseResult) + { + GeneratorParamsArgs generatorParamsArgs = new GeneratorParamsArgs + { + batch_size = parseResult.GetValue("batch_size"), + chunk_size = parseResult.GetValue("chunk_size"), + do_sample = parseResult.GetValue("do_sample"), + min_length = parseResult.GetValue("min_length"), + max_length = parseResult.GetValue("max_length"), + num_beams = parseResult.GetValue("num_beams"), + num_return_sequences = parseResult.GetValue("num_return_sequences"), + repetition_penalty = parseResult.GetValue("repetition_penalty"), + temperature = parseResult.GetValue("temperature"), + top_k = parseResult.GetValue("top_k"), + top_p = parseResult.GetValue("top_p") + }; + + GuidanceArgs guidanceArgs = new GuidanceArgs + { + response_format = parseResult.GetValue("response_format") ?? "", + tools_file = parseResult.GetValue("tools_file") ?? "", + text_output = parseResult.GetValue("text_output"), + tool_output = parseResult.GetValue("tool_output"), + tool_call_start = parseResult.GetValue("tool_call_start") ?? "", + tool_call_end = parseResult.GetValue("tool_call_end") ?? "" + }; + + return (generatorParamsArgs, guidanceArgs); + } + } + + /// + /// A class for defining a tool in a JSON schema compatible way + /// + public class ToolSchema + { + [JsonPropertyName("description")] + public required string Description { get; set; } + [JsonPropertyName("type")] + public required string Type { get; set; } + [JsonPropertyName("properties")] + public required Dictionary Properties { get; set; } + [JsonPropertyName("required")] + public required IList Required { get; set; } + [JsonPropertyName("additionalProperties")] + public required bool AdditionalProperties { get; set; } + } + + /// + /// A class for defining a JSON schema for guidance + /// + public class JsonSchema + { + [JsonPropertyName("x-guidance")] + public required Dictionary XGuidance { get; set; } + [JsonPropertyName("type")] + public required string Type { get; set; } + [JsonPropertyName("items")] + public required Dictionary> Items { get; set; } + [JsonPropertyName("minItems")] + public required int MinItems { get; set; } + } + + /// + /// A class for defining a function in an OpenAI-compatible way + /// + public class FunctionDefinition + { + [JsonPropertyName("name")] + public required string Name { get; set; } + [JsonPropertyName("description")] + public required string Description { get; set; } + [JsonPropertyName("parameters")] + public required Dictionary Parameters { get; set; } + } + + /// + /// A class for defining a tool in an OpenAI-compatible way + /// + public class Tool + { + [JsonPropertyName("type")] + public required string Type { get; set; } + [JsonPropertyName("function")] + public required FunctionDefinition Function { get; set; } + } + + [JsonSourceGenerationOptions(WriteIndented = true, PropertyNamingPolicy = JsonKnownNamingPolicy.CamelCase)] + [JsonSerializable(typeof(ToolSchema))] + [JsonSerializable(typeof(JsonSchema))] + [JsonSerializable(typeof(FunctionDefinition))] + [JsonSerializable(typeof(Tool))] + [JsonSerializable(typeof(JsonElement))] + [JsonSerializable(typeof(Dictionary))] + [JsonSerializable(typeof(Dictionary))] + [JsonSerializable(typeof(IList>))] + [JsonSerializable(typeof(List>))] + [JsonSerializable(typeof(IList))] + [JsonSerializable(typeof(List))] + public sealed partial class ToolSerializerContext : JsonSerializerContext + { + } + + /// + /// A class for holding parsed values for generator params + /// + public class GeneratorParamsArgs + { + // In case the user doesn't provide the batch size, set it to 1 + public int batch_size { get; set; } = 1; + // In case the user doesn't provide the chunk size, set it to 0 + public int chunk_size { get; set; } = 0; + public bool? do_sample { get; set; } + public int? min_length { get; set; } + public int? max_length { get; set; } + // In case the user doesn't provide the number of beams, set it to 1 + public int num_beams { get; set; } = 1; + // In case the user doesn't provide the number of return sequences, set it to 1 + public int num_return_sequences { get; set; } = 1; + public double? repetition_penalty { get; set; } + public double? temperature { get; set; } + public int? top_k { get; set; } + public double? top_p { get; set; } + } + + /// + /// A class for holding parsed values for guidance + /// + public class GuidanceArgs + { + public string response_format { get; set; } = ""; + public string tools_file { get; set; } = ""; + public bool text_output { get; set; } = false; + public bool tool_output { get; set; } = false; + public string tool_call_start { get; set; } = ""; + public string tool_call_end { get; set; } = ""; + } + + [JsonSourceGenerationOptions(WriteIndented = true, PropertyNamingPolicy = JsonKnownNamingPolicy.CamelCase)] + [JsonSerializable(typeof(GeneratorParamsArgs))] + [JsonSerializable(typeof(GuidanceArgs))] + public sealed partial class ArgsSerializerContext : JsonSerializerContext + { + } +} diff --git a/examples/csharp/Genny/.gitignore b/examples/csharp/Genny/.gitignore deleted file mode 100644 index 4961924315..0000000000 --- a/examples/csharp/Genny/.gitignore +++ /dev/null @@ -1,346 +0,0 @@ -## Ignore Visual Studio temporary files, build results, and -## files generated by popular Visual Studio add-ons. -## -## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore - -# User-specific files -*.suo -*.user -*.userosscache -*.sln.docstates - -# User-specific files (MonoDevelop/Xamarin Studio) -*.userprefs - -# Build results -[Dd]ebug/ -[Dd]ebugPublic/ -[Rr]elease/ -[Rr]eleases/ -x64/ -x86/ -bld/ -[Bb]in/ -[Oo]bj/ -[Ll]og/ - -# Visual Studio 2015/2017 cache/options directory -.vs/ -# Uncomment if you have tasks that create the project's static files in wwwroot -#wwwroot/ - -# Visual Studio 2017 auto generated files -Generated\ Files/ - -# MSTest test Results -[Tt]est[Rr]esult*/ -[Bb]uild[Ll]og.* - -# NUNIT -*.VisualState.xml -TestResult.xml - -# Build Results of an ATL Project -[Dd]ebugPS/ -[Rr]eleasePS/ -dlldata.c - -# Benchmark Results -BenchmarkDotNet.Artifacts/ - -# .NET Core -project.lock.json -project.fragment.lock.json -artifacts/ -**/Properties/launchSettings.json - -# StyleCop -StyleCopReport.xml - -# Files built by Visual Studio -*_i.c -*_p.c -*_i.h -*.ilk -*.obj -*.iobj -*.pch -*.pdb -*.ipdb -*.pgc -*.pgd -*.rsp -*.sbr -*.tlb -*.tli -*.tlh -*.tmp -*.tmp_proj -*.log -*.vspscc -*.vssscc -.builds -*.pidb -*.svclog -*.scc - -# Chutzpah Test files -_Chutzpah* - -# Visual C++ cache files -ipch/ -*.aps -*.ncb -*.opendb -*.opensdf -*.sdf -*.cachefile -*.VC.db -*.VC.VC.opendb - -# Visual Studio profiler -*.psess -*.vsp -*.vspx -*.sap - -# Visual Studio Trace Files -*.e2e - -# TFS 2012 Local Workspace -$tf/ - -# Guidance Automation Toolkit -*.gpState - -# ReSharper is a .NET coding add-in -_ReSharper*/ -*.[Rr]e[Ss]harper -*.DotSettings.user - -# JustCode is a .NET coding add-in -.JustCode - -# TeamCity is a build add-in -_TeamCity* - -# DotCover is a Code Coverage Tool -*.dotCover - -# AxoCover is a Code Coverage Tool -.axoCover/* -!.axoCover/settings.json - -# Visual Studio code coverage results -*.coverage -*.coveragexml - -# NCrunch -_NCrunch_* -.*crunch*.local.xml -nCrunchTemp_* - -# MightyMoose -*.mm.* -AutoTest.Net/ - -# Web workbench (sass) -.sass-cache/ - -# Installshield output folder -[Ee]xpress/ - -# DocProject is a documentation generator add-in -DocProject/buildhelp/ -DocProject/Help/*.HxT -DocProject/Help/*.HxC -DocProject/Help/*.hhc -DocProject/Help/*.hhk -DocProject/Help/*.hhp -DocProject/Help/Html2 -DocProject/Help/html - -# Click-Once directory -publish/ - -# Publish Web Output -*.[Pp]ublish.xml -*.azurePubxml -# Note: Comment the next line if you want to checkin your web deploy settings, -# but database connection strings (with potential passwords) will be unencrypted -*.pubxml -*.publishproj - -# Microsoft Azure Web App publish settings. Comment the next line if you want to -# checkin your Azure Web App publish settings, but sensitive information contained -# in these scripts will be unencrypted -PublishScripts/ - -# NuGet Packages -*.nupkg -# The packages folder can be ignored because of Package Restore -**/[Pp]ackages/* -# except build/, which is used as an MSBuild target. -!**/[Pp]ackages/build/ -# Uncomment if necessary however generally it will be regenerated when needed -#!**/[Pp]ackages/repositories.config -# NuGet v3's project.json files produces more ignorable files -*.nuget.props -*.nuget.targets - -# Microsoft Azure Build Output -csx/ -*.build.csdef - -# Microsoft Azure Emulator -ecf/ -rcf/ - -# Windows Store app package directories and files -AppPackages/ -BundleArtifacts/ -Package.StoreAssociation.xml -_pkginfo.txt -*.appx - -# Visual Studio cache files -# files ending in .cache can be ignored -*.[Cc]ache -# but keep track of directories ending in .cache -!*.[Cc]ache/ - -# Others -ClientBin/ -~$* -*~ -*.dbmdl -*.dbproj.schemaview -*.jfm -*.pfx -*.publishsettings -orleans.codegen.cs - -# Including strong name files can present a security risk -# (https://github.com/github/gitignore/pull/2483#issue-259490424) -#*.snk - -# Since there are multiple workflows, uncomment next line to ignore bower_components -# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) -#bower_components/ - -# RIA/Silverlight projects -Generated_Code/ - -# Backup & report files from converting an old project file -# to a newer Visual Studio version. Backup files are not needed, -# because we have git ;-) -_UpgradeReport_Files/ -Backup*/ -UpgradeLog*.XML -UpgradeLog*.htm -ServiceFabricBackup/ -*.rptproj.bak - -# SQL Server files -*.mdf -*.ldf -*.ndf - -# Business Intelligence projects -*.rdl.data -*.bim.layout -*.bim_*.settings -*.rptproj.rsuser - -# Microsoft Fakes -FakesAssemblies/ - -# GhostDoc plugin setting file -*.GhostDoc.xml - -# Node.js Tools for Visual Studio -.ntvs_analysis.dat -node_modules/ - -# Visual Studio 6 build log -*.plg - -# Visual Studio 6 workspace options file -*.opt - -# Visual Studio 6 auto-generated workspace file (contains which files were open etc.) -*.vbw - -# Visual Studio LightSwitch build output -**/*.HTMLClient/GeneratedArtifacts -**/*.DesktopClient/GeneratedArtifacts -**/*.DesktopClient/ModelManifest.xml -**/*.Server/GeneratedArtifacts -**/*.Server/ModelManifest.xml -_Pvt_Extensions - -# Paket dependency manager -.paket/paket.exe -paket-files/ - -# FAKE - F# Make -.fake/ - -# JetBrains Rider -.idea/ -*.sln.iml - -# CodeRush -.cr/ - -# Python Tools for Visual Studio (PTVS) -__pycache__/ -*.pyc - -# Cake - Uncomment if you are using it -# tools/** -# !tools/packages.config - -# Tabs Studio -*.tss - -# Telerik's JustMock configuration file -*.jmconfig - -# BizTalk build output -*.btp.cs -*.btm.cs -*.odx.cs -*.xsd.cs - -# OpenCover UI analysis results -OpenCover/ - -# Azure Stream Analytics local run output -ASALocalRun/ - -# MSBuild Binary and Structured Log -*.binlog - -# NVidia Nsight GPU debugger configuration file -*.nvuser - -# MFractors (Xamarin productivity tool) working folder -.mfractor/ -/docs/build -src/TensorFlowNET.Native/bazel-* -src/TensorFlowNET.Native/c_api.h -/.vscode -test/TensorFlowNET.Examples/mnist - - -# training model resources -.resources -/redist -*.xml -*.xsd - -# docs -site/ - -docker-test-output/* diff --git a/examples/csharp/Genny/Assets/Screenshot1.PNG b/examples/csharp/Genny/Assets/Screenshot1.PNG deleted file mode 100644 index 59ef9f19ad..0000000000 Binary files a/examples/csharp/Genny/Assets/Screenshot1.PNG and /dev/null differ diff --git a/examples/csharp/Genny/Assets/Screenshot2.PNG b/examples/csharp/Genny/Assets/Screenshot2.PNG deleted file mode 100644 index d1c6354813..0000000000 Binary files a/examples/csharp/Genny/Assets/Screenshot2.PNG and /dev/null differ diff --git a/examples/csharp/Genny/Genny.sln b/examples/csharp/Genny/Genny.sln deleted file mode 100644 index 860fbaaa02..0000000000 --- a/examples/csharp/Genny/Genny.sln +++ /dev/null @@ -1,37 +0,0 @@ - -Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio Version 17 -VisualStudioVersion = 17.9.34622.214 -MinimumVisualStudioVersion = 10.0.40219.1 -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Genny", "Genny\Genny.csproj", "{831197BD-63C7-4C0F-AD0E-4F6783CBB5C0}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug_Cuda|x64 = Debug_Cuda|x64 - Debug_DirectML|x64 = Debug_DirectML|x64 - Debug|x64 = Debug|x64 - Release_Cuda|x64 = Release_Cuda|x64 - Release_DirectML|x64 = Release_DirectML|x64 - Release|x64 = Release|x64 - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {831197BD-63C7-4C0F-AD0E-4F6783CBB5C0}.Debug_Cuda|x64.ActiveCfg = Debug_Cuda|x64 - {831197BD-63C7-4C0F-AD0E-4F6783CBB5C0}.Debug_Cuda|x64.Build.0 = Debug_Cuda|x64 - {831197BD-63C7-4C0F-AD0E-4F6783CBB5C0}.Debug_DirectML|x64.ActiveCfg = Debug_DirectML|x64 - {831197BD-63C7-4C0F-AD0E-4F6783CBB5C0}.Debug_DirectML|x64.Build.0 = Debug_DirectML|x64 - {831197BD-63C7-4C0F-AD0E-4F6783CBB5C0}.Debug|x64.ActiveCfg = Debug|x64 - {831197BD-63C7-4C0F-AD0E-4F6783CBB5C0}.Debug|x64.Build.0 = Debug|x64 - {831197BD-63C7-4C0F-AD0E-4F6783CBB5C0}.Release_Cuda|x64.ActiveCfg = Release_Cuda|x64 - {831197BD-63C7-4C0F-AD0E-4F6783CBB5C0}.Release_Cuda|x64.Build.0 = Release_Cuda|x64 - {831197BD-63C7-4C0F-AD0E-4F6783CBB5C0}.Release_DirectML|x64.ActiveCfg = Release_DirectML|x64 - {831197BD-63C7-4C0F-AD0E-4F6783CBB5C0}.Release_DirectML|x64.Build.0 = Release_DirectML|x64 - {831197BD-63C7-4C0F-AD0E-4F6783CBB5C0}.Release|x64.ActiveCfg = Release|x64 - {831197BD-63C7-4C0F-AD0E-4F6783CBB5C0}.Release|x64.Build.0 = Release|x64 - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection - GlobalSection(ExtensibilityGlobals) = postSolution - SolutionGuid = {A7159277-CA72-45A9-8327-E3BF29214643} - EndGlobalSection -EndGlobal diff --git a/examples/csharp/Genny/Genny/App.xaml b/examples/csharp/Genny/Genny/App.xaml deleted file mode 100644 index ec5ea8fd14..0000000000 --- a/examples/csharp/Genny/Genny/App.xaml +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - diff --git a/examples/csharp/Genny/Genny/App.xaml.cs b/examples/csharp/Genny/Genny/App.xaml.cs deleted file mode 100644 index b6e61e540f..0000000000 --- a/examples/csharp/Genny/Genny/App.xaml.cs +++ /dev/null @@ -1,11 +0,0 @@ -using System.Windows; - -namespace Genny -{ - /// - /// Interaction logic for App.xaml - /// - public partial class App : Application - { - } -} diff --git a/examples/csharp/Genny/Genny/AssemblyInfo.cs b/examples/csharp/Genny/Genny/AssemblyInfo.cs deleted file mode 100644 index b0ec827578..0000000000 --- a/examples/csharp/Genny/Genny/AssemblyInfo.cs +++ /dev/null @@ -1,10 +0,0 @@ -using System.Windows; - -[assembly: ThemeInfo( - ResourceDictionaryLocation.None, //where theme specific resource dictionaries are located - //(used if a resource is not found in the page, - // or application resource dictionaries) - ResourceDictionaryLocation.SourceAssembly //where the generic resource dictionary is located - //(used if a resource is not found in the page, - // app, or any theme specific resource dictionaries) -)] diff --git a/examples/csharp/Genny/Genny/Controls/SearchOptionsControl.xaml b/examples/csharp/Genny/Genny/Controls/SearchOptionsControl.xaml deleted file mode 100644 index 2983243b59..0000000000 --- a/examples/csharp/Genny/Genny/Controls/SearchOptionsControl.xaml +++ /dev/null @@ -1,82 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/examples/csharp/Genny/Genny/Controls/SearchOptionsControl.xaml.cs b/examples/csharp/Genny/Genny/Controls/SearchOptionsControl.xaml.cs deleted file mode 100644 index 6386a43ded..0000000000 --- a/examples/csharp/Genny/Genny/Controls/SearchOptionsControl.xaml.cs +++ /dev/null @@ -1,30 +0,0 @@ -using Genny.ViewModel; -using System.Windows; -using System.Windows.Controls; - -namespace Genny.Controls -{ - /// - /// Interaction logic for SearchOptionsControl.xaml - /// - public partial class SearchOptionsControl : UserControl - { - public SearchOptionsControl() - { - InitializeComponent(); - } - - public static readonly DependencyProperty SearchOptionsProperty = - DependencyProperty.Register(nameof(SearchOptions), typeof(SearchOptionsModel), typeof(SearchOptionsControl), new PropertyMetadata(new SearchOptionsModel())); - - - /// - /// Gets or sets the search options. - /// - public SearchOptionsModel SearchOptions - { - get { return (SearchOptionsModel)GetValue(SearchOptionsProperty); } - set { SetValue(SearchOptionsProperty, value); } - } - } -} diff --git a/examples/csharp/Genny/Genny/Extensions.cs b/examples/csharp/Genny/Genny/Extensions.cs deleted file mode 100644 index 5074df1e25..0000000000 --- a/examples/csharp/Genny/Genny/Extensions.cs +++ /dev/null @@ -1,50 +0,0 @@ -using Genny.ViewModel; -using Microsoft.ML.OnnxRuntimeGenAI; -using System.Threading; -using System.Threading.Tasks; -using System.Windows; - -namespace Genny -{ - internal static class Extensions - { - - /// - /// Applies the search options to the generator parameters. - /// - /// The generator parameters. - /// The search options. - internal static void ApplySearchOptions(this GeneratorParams generatorParams, SearchOptionsModel searchOptions) - { - generatorParams.SetSearchOption("top_p", searchOptions.TopP); - generatorParams.SetSearchOption("top_k", searchOptions.TopK); - generatorParams.SetSearchOption("temperature", searchOptions.Temperature); - generatorParams.SetSearchOption("repetition_penalty", searchOptions.RepetitionPenalty); - generatorParams.SetSearchOption("past_present_share_buffer", searchOptions.PastPresentShareBuffer); - generatorParams.SetSearchOption("num_return_sequences", searchOptions.NumReturnSequences); - generatorParams.SetSearchOption("no_repeat_ngram_size", searchOptions.NoRepeatNgramSize); - generatorParams.SetSearchOption("min_length", searchOptions.MinLength); - generatorParams.SetSearchOption("max_length", searchOptions.MaxLength); - generatorParams.SetSearchOption("length_penalty", searchOptions.LengthPenalty); - generatorParams.SetSearchOption("early_stopping", searchOptions.EarlyStopping); - generatorParams.SetSearchOption("do_sample", searchOptions.DoSample); - generatorParams.SetSearchOption("diversity_penalty", searchOptions.DiversityPenalty); - } - - internal static Task EncodeAsync(this Tokenizer tokenizer, string input, CancellationToken cancellationToken = default) - { - return Application.Current.Dispatcher.Invoke(() => - { - return Task.Run(() => tokenizer.Encode(input), cancellationToken); - }); - } - - internal static Task DecodeAsync(this Tokenizer tokenizer, int[] input, CancellationToken cancellationToken = default) - { - return Application.Current.Dispatcher.Invoke(() => - { - return Task.Run(() => tokenizer.Decode(input), cancellationToken); - }); - } - } -} diff --git a/examples/csharp/Genny/Genny/Genny.csproj b/examples/csharp/Genny/Genny/Genny.csproj deleted file mode 100644 index 10a39d7e2f..0000000000 --- a/examples/csharp/Genny/Genny/Genny.csproj +++ /dev/null @@ -1,26 +0,0 @@ - - - - WinExe - net6.0-windows - disable - disable - true - true - x64 - x64 - Debug;Release;Debug_Cuda;Release_Cuda;Debug_DirectML;Release_DirectML - - - - - - - - - - - - - - diff --git a/examples/csharp/Genny/Genny/Images/robot.png b/examples/csharp/Genny/Genny/Images/robot.png deleted file mode 100644 index 96edd0fb10..0000000000 Binary files a/examples/csharp/Genny/Genny/Images/robot.png and /dev/null differ diff --git a/examples/csharp/Genny/Genny/Images/user.png b/examples/csharp/Genny/Genny/Images/user.png deleted file mode 100644 index dcaf32f594..0000000000 Binary files a/examples/csharp/Genny/Genny/Images/user.png and /dev/null differ diff --git a/examples/csharp/Genny/Genny/MainWindow.xaml b/examples/csharp/Genny/Genny/MainWindow.xaml deleted file mode 100644 index 3d721f96b5..0000000000 --- a/examples/csharp/Genny/Genny/MainWindow.xaml +++ /dev/null @@ -1,72 +0,0 @@ - - - - - - - - - - -