audio_paths_c;
@@ -161,8 +162,8 @@ void C_API(const char* model_path, int32_t num_beams) {
std::cout << str << std::endl;
}
- for (int i = 0; i < 3; ++i)
- std::cout << std::endl;
+ std::cout << "\n\n"
+ << std::endl;
OgaDestroyGenerator(generator);
OgaDestroyGeneratorParams(params);
diff --git a/examples/chat_app/README.md b/examples/chat_app/README.md
deleted file mode 100755
index 3755325c51..0000000000
--- a/examples/chat_app/README.md
+++ /dev/null
@@ -1,92 +0,0 @@
-# LLM Chat UI
-
-This is a chat demo using the various versions of the LLMs
-
-> The app supports all of the CPU, CUDA and DirectML. CUDA is used as an example.
-
-**Contents**:
-- [Setup](#setup)
-- [Get the model](#get-the-model)
-- [Launch the app](#launch-the-app)
-
-## Setup
-
-1. Install **onnxruntime-genai-cuda**
- > If you want to use DirectML model, you can download `onnxruntime-genai-directml` package.
-
- ```
- pip install numpy
- pip install --pre onnxruntime-genai-cuda
- ```
-
-2. Get this example
-
- ```bash
- git clone -n --depth=1 --filter=tree:0 https://github.com/microsoft/onnxruntime-genai.git
- cd onnxruntime-genai
- git sparse-checkout set --no-cone examples/chat_app
- git checkout
- cd examples/chat_app
- ```
-
-3. Install the requirements
-
- ```bash
- pip install huggingface-hub mdtex2html
- pip install gradio==4.36.0 # Gradio 3.47 breaks the UI and versions between 3.42 and 3.47 haven't been tested
- ```
-
-
-## Get the model
-
-> If you already downloaded your model, you can skip this part and add `--model_path` when launching the app
-> For example. `python chat_app/app.py -m "/mnt/onnx/Phi-3-vision"`
-
-```bash
-cd ..
-huggingface-cli download microsoft/Phi-3-vision-128k-instruct-onnx-cuda --include cuda-int4-rtn-block-32/* --local-dir .
-mkdir -p models/cuda
-mv cuda-int4-rtn-block-32 models/cuda-int4/Phi-3-vision
-```
-
-If you would like the app to discover your models, please create the following folder structure, with the `models` folder at the same level as `chat_app`, one folder containing a set of models, and the actual models below this.
-
-```
---chat_app
---models
- --directml
- --phi-3-vision-directml-int4-awq-block-128
- --meta-llama_Llama-2-7b-chat-hf
- --mistralai_Mistral-7B-Instruct-v0.1
- ...
- --cuda-int4
- --Phi-3-vision
-```
-
-If there is the word `vision` in the folder name containing the model files, the app will create a UI that processes images. If not, it will create a UI that processes language only.
-
-## Launch the app
-
-```
-python app.py
-```
-
-You can also attach your model that is outside of `models` folder to the app by passing arguments of `--model_path` and `--model_name`.
-
-```bash
-python chat_app/app.py --model_name "Phi-3-vision" --model_path "/mnt/onnx/Phi-3-vision"
-```
-
-You should see output from console
-```
-Running on local URL: http://127.0.0.1:7860
-
-To create a public link, set `share=True` in `launch()`.
-```
-
-Then open the local URL in browser
-
-
-For vision model, you will have the below UI interface.
-
-
diff --git a/examples/chat_app/__init__.py b/examples/chat_app/__init__.py
deleted file mode 100755
index cc2c489b27..0000000000
--- a/examples/chat_app/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-import os
-import sys
-
-sys.path.append(os.path.dirname(os.path.realpath(__file__)))
diff --git a/examples/chat_app/app.py b/examples/chat_app/app.py
deleted file mode 100755
index cff38054e2..0000000000
--- a/examples/chat_app/app.py
+++ /dev/null
@@ -1,261 +0,0 @@
-import argparse
-import gc
-import os
-from pathlib import Path
-
-import gradio as gr
-from app_modules.overwrites import postprocess
-from app_modules.presets import description, small_and_beautiful_theme, title
-from app_modules.utils import cancel_outputing, delete_last_conversation, reset_state, reset_textbox, transfer_input
-from interface.hddr_llm_onnx_interface import ONNXModel
-from interface.multimodal_onnx_interface import MultiModal_ONNXModel
-
-top_directory = os.path.join(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
-optimized_directory = os.path.join(top_directory, "models")
-available_models = {}
-
-interface = None
-
-
-def change_model_listener(new_model_name):
- global interface
-
- # if a model exists - shut it down before trying to create the new one
- if interface is not None:
- interface.shutdown()
- del interface
- gc.collect()
-
- d = available_models[new_model_name]
-
- if "vision" in new_model_name:
- print("Configuring for multi-modal model")
- interface = MultiModal_ONNXModel(
- model_path=d["model_dir"],
- execution_provider=d["provider"],
- )
- else:
- print("Configuring for language-only model")
- interface = ONNXModel(
- model_path=d["model_dir"],
- execution_provider=d["provider"],
- )
-
- # interface.initialize()
-
- return [
- new_model_name,
- gr.update(visible="vision" in new_model_name),
- [],
- [],
- gr.update(value=""),
- "",
- ]
-
-
-def change_image_visibility(new_model_name):
- if "vision" in new_model_name:
- return gr.update(visible=True)
-
- return gr.update(visible=False)
-
-
-gr.Chatbot.postprocess = postprocess
-
-with Path(f"{top_directory}/chat_app/assets/custom.css").open() as f:
- custom_css = f.read()
-
-
-def interface_predict(*args):
- res = interface.predict(*args)
- yield from res
-
-
-def interface_retry(*args):
- res = interface.retry(*args)
- yield from res
-
-
-def get_ep_name(name):
- new_name = name.lower().replace("directml", "dml")
- if "cpu" in new_name:
- return "cpu"
- elif "cuda" in new_name:
- return "cuda"
- elif "dml" in new_name:
- return "dml"
- elif "nvtensorrtrtx" in new_name:
- return "NvTensorRtRtx"
- raise ValueError(f"{new_name} is not recognized.")
-
-
-def launch_chat_app(expose_locally: bool = False, model_name: str = "", model_path: str = ""):
- if os.path.exists(optimized_directory):
- for ep_name in os.listdir(optimized_directory):
- sub_optimized_directory = os.path.join(optimized_directory, ep_name)
- for model_name in os.listdir(sub_optimized_directory):
- available_models[model_name] = {
- "model_dir": os.path.join(sub_optimized_directory, model_name),
- "provider": get_ep_name(ep_name),
- }
-
- if model_path:
- available_models[model_name] = {"model_dir": model_path, "provider": get_ep_name(model_path)}
-
- with gr.Blocks(css=custom_css, theme=small_and_beautiful_theme) as demo:
- history = gr.State([])
- user_question = gr.State("")
- with gr.Row():
- gr.HTML(title)
- status_display = gr.Markdown("Success", elem_id="status_display")
-
- with gr.Row():
- with gr.Column(scale=4):
- with gr.Row():
- chatbot = gr.Chatbot(elem_id="chuanhu_chatbot", height=650)
- with gr.Row():
- with gr.Column(scale=12):
- user_input = gr.Textbox(show_label=False, placeholder="Enter text")
- with gr.Column(min_width=70, scale=1):
- submit_button = gr.Button("Send")
- with gr.Column(min_width=70, scale=1):
- cancel_button = gr.Button("Stop")
- with gr.Row():
- empty_button = gr.Button(
- "🧹 New Conversation",
- )
- retry_button = gr.Button("🔄 Regenerate")
- delete_last_button = gr.Button("🗑️ Remove Last Turn")
- reset_args = {"fn": reset_textbox, "inputs": [], "outputs": [user_input, status_display]}
- with gr.Column(), gr.Column(min_width=50, scale=1), gr.Tab(label="Parameter Setting"):
- gr.Markdown("# Model")
- model_name = gr.Dropdown(
- choices=list(available_models.keys()),
- label="Model",
- show_label=False, # default="Empty STUB",
- value=next(iter(available_models.keys())),
- )
- max_length_tokens = gr.Slider(
- minimum=0,
- maximum=131072,
- value=8192,
- step=128,
- interactive=True,
- label="Max Token Length",
- )
- max_context_length_tokens = gr.Slider(
- minimum=0,
- maximum=131072,
- value=8192,
- step=128,
- interactive=True,
- label="Max History Token Length",
- )
- token_printing_step = gr.Slider(
- minimum=1, maximum=50, value=4, step=1, interactive=True, label="Token Printing Step", visible=False
- )
- images = gr.File(file_count="multiple", file_types=["image"], label="Upload image(s)", visible=False)
- images.change(
- reset_state,
- outputs=[chatbot, history, status_display],
- show_progress=True,
- )
- images.change(**reset_args)
-
- model_name.change(
- change_model_listener,
- inputs=[model_name],
- outputs=[model_name, images, chatbot, history, user_input, status_display],
- )
- gr.Markdown(description)
-
- predict_args = {
- "fn": interface_predict,
- "inputs": [
- user_question,
- chatbot,
- history,
- max_length_tokens,
- max_context_length_tokens,
- token_printing_step,
- images,
- ],
- "outputs": [chatbot, history, status_display],
- "show_progress": True,
- }
- retry_args = {
- "fn": interface_retry,
- "inputs": [chatbot, history, max_length_tokens, max_context_length_tokens, token_printing_step, images],
- "outputs": [chatbot, history, status_display],
- "show_progress": True,
- }
-
- # Chatbot
- transfer_input_args = {
- "fn": transfer_input,
- "inputs": [user_input],
- "outputs": [user_question, user_input, submit_button],
- "show_progress": True,
- }
-
- predict_event1 = user_input.submit(**transfer_input_args).then(**predict_args)
-
- predict_event2 = submit_button.click(**transfer_input_args).then(**predict_args)
-
- empty_button.click(
- reset_state,
- outputs=[chatbot, history, status_display],
- show_progress=True,
- )
- empty_button.click(**reset_args)
-
- predict_event3 = retry_button.click(**retry_args)
-
- delete_last_button.click(
- delete_last_conversation,
- [chatbot, history],
- [chatbot, history, status_display],
- show_progress=True,
- )
- cancel_button.click(
- cancel_outputing,
- [],
- [status_display],
- cancels=[predict_event1, predict_event2, predict_event3],
- )
-
- demo.load(change_model_listener, inputs=[model_name], outputs=[model_name, images], concurrency_limit=1)
-
- demo.title = "Local Model UI"
-
- if expose_locally:
- demo.launch(server_name="0.0.0.0", server_port=5000)
- else:
- demo.launch(share=True, server_port=5000)
-
-
-if __name__ == "__main__":
- parser = argparse.ArgumentParser()
- parser.add_argument("--expose_locally", action="store_true")
- parser.add_argument(
- "--model_path", "-m", type=str, required=False, help="The location where your model is located."
- )
- parser.add_argument("--model_name", "-n", type=str, required=False, help="The name of your model")
- args = parser.parse_args()
- model_path = args.model_path
-
- if not os.path.exists(optimized_directory) and not model_path:
- raise ValueError("Please download the model into models folder or load the model by passing --model_path")
-
- if args.model_path:
- model_name = os.path.basename(model_path)
- # check if genai_config.json in the model foler
- if "genai_config.json" not in os.listdir(model_path):
- raise ValueError(
- f"Your model_path folder do not include 'genai.json' file, please double check your model_path '{model_path}'"
- )
-
- if args.model_name:
- model_name = args.model_name
-
- launch_chat_app(args.expose_locally, model_name, model_path)
diff --git a/examples/chat_app/app_modules/overwrites.py b/examples/chat_app/app_modules/overwrites.py
deleted file mode 100755
index 8807b89027..0000000000
--- a/examples/chat_app/app_modules/overwrites.py
+++ /dev/null
@@ -1,28 +0,0 @@
-from __future__ import annotations
-
-from .presets import gr
-from .utils import convert_asis, convert_mdtext, detect_converted_mark
-
-
-def postprocess(self, y: list[tuple[str | None, str | None]]) -> list[tuple[str | None, str | None]]:
- """Each message and response should be a string, which may be in Markdown format.
-
- Returns:
- List of tuples representing the message and response.
- Each message and response will be a string of HTML.
-
- """
- if y is None or y == []:
- return []
- temp = []
- for x in y:
- user, bot = x
- if not detect_converted_mark(user):
- user = convert_asis(user)
- if not detect_converted_mark(bot):
- bot = convert_mdtext(bot)
- temp.append((user, bot))
- return temp
-
-
-GradioTemplateResponseOriginal = gr.routes.templates.TemplateResponse
diff --git a/examples/chat_app/app_modules/presets.py b/examples/chat_app/app_modules/presets.py
deleted file mode 100755
index 64a5398ea3..0000000000
--- a/examples/chat_app/app_modules/presets.py
+++ /dev/null
@@ -1,73 +0,0 @@
-import gradio as gr
-
-title = """LLM Chat UI, Powered By ONNX
"""
-description = """\
-
-This is a chat demo using the various versions of the LLMs
-
-"""
-CONCURRENT_COUNT = 100
-
-
-ALREADY_CONVERTED_MARK = ""
-
-small_and_beautiful_theme = gr.themes.Soft(
- primary_hue=gr.themes.Color(
- c50="#02C160",
- c100="rgba(2, 193, 96, 0.2)",
- c200="#02C160",
- c300="rgba(2, 193, 96, 0.32)",
- c400="rgba(2, 193, 96, 0.32)",
- c500="rgba(2, 193, 96, 1.0)",
- c600="rgba(2, 193, 96, 1.0)",
- c700="rgba(2, 193, 96, 0.32)",
- c800="rgba(2, 193, 96, 0.32)",
- c900="#02C160",
- c950="#02C160",
- ),
- secondary_hue=gr.themes.Color(
- c50="#576b95",
- c100="#576b95",
- c200="#576b95",
- c300="#576b95",
- c400="#576b95",
- c500="#576b95",
- c600="#576b95",
- c700="#576b95",
- c800="#576b95",
- c900="#576b95",
- c950="#576b95",
- ),
- neutral_hue=gr.themes.Color(
- name="gray",
- c50="#f9fafb",
- c100="#f3f4f6",
- c200="#e5e7eb",
- c300="#d1d5db",
- c400="#B2B2B2",
- c500="#808080",
- c600="#636363",
- c700="#515151",
- c800="#393939",
- c900="#272727",
- c950="#171717",
- ),
- radius_size=gr.themes.sizes.radius_sm,
-).set(
- button_primary_background_fill="#06AE56",
- button_primary_background_fill_dark="#06AE56",
- button_primary_background_fill_hover="#07C863",
- button_primary_border_color="#06AE56",
- button_primary_border_color_dark="#06AE56",
- button_primary_text_color="#FFFFFF",
- button_primary_text_color_dark="#FFFFFF",
- button_secondary_background_fill="#F2F2F2",
- button_secondary_background_fill_dark="#2B2B2B",
- button_secondary_text_color="#393939",
- button_secondary_text_color_dark="#FFFFFF",
- background_fill_primary="#F7F7F7",
- background_fill_primary_dark="#1F1F1F",
- block_title_text_color="*primary_500",
- block_title_background_fill="*primary_100",
- input_background_fill="#F6F6F6",
-)
diff --git a/examples/chat_app/app_modules/utils.py b/examples/chat_app/app_modules/utils.py
deleted file mode 100755
index 1ce8ef0060..0000000000
--- a/examples/chat_app/app_modules/utils.py
+++ /dev/null
@@ -1,222 +0,0 @@
-from __future__ import annotations
-
-import html
-import re
-
-import gradio as gr
-import mdtex2html
-from markdown import markdown
-from pygments import highlight
-from pygments.formatters import HtmlFormatter
-from pygments.lexers import ClassNotFound, get_lexer_by_name, guess_lexer
-
-from .presets import ALREADY_CONVERTED_MARK
-
-
-def markdown_to_html_with_syntax_highlight(md_str):
- def replacer(match):
- lang = match.group(1) or "text"
- code = match.group(2)
- lang = lang.strip()
- # print(1,lang)
- if lang == "text":
- lexer = guess_lexer(code)
- lang = lexer.name
- # print(2,lang)
- try:
- lexer = get_lexer_by_name(lang, stripall=True)
- except ValueError:
- lexer = get_lexer_by_name("python", stripall=True)
- formatter = HtmlFormatter()
- # print(3,lexer.name)
- highlighted_code = highlight(code, lexer, formatter)
-
- return f'{highlighted_code}
'
-
- code_block_pattern = r"```(\w+)?\n([\s\S]+?)\n```"
- md_str = re.sub(code_block_pattern, replacer, md_str, flags=re.MULTILINE)
-
- return markdown(md_str)
-
-
-def normalize_markdown(md_text: str) -> str:
- lines = md_text.split("\n")
- normalized_lines = []
- inside_list = False
-
- for i, line in enumerate(lines):
- if re.match(r"^(\d+\.|-|\*|\+)\s", line.strip()):
- if not inside_list and i > 0 and lines[i - 1].strip() != "":
- normalized_lines.append("")
- inside_list = True
- normalized_lines.append(line)
- elif inside_list and line.strip() == "":
- if i < len(lines) - 1 and not re.match(r"^(\d+\.|-|\*|\+)\s", lines[i + 1].strip()):
- normalized_lines.append(line)
- continue
- else:
- inside_list = False
- normalized_lines.append(line)
-
- return "\n".join(normalized_lines)
-
-
-def convert_mdtext(md_text):
- code_block_pattern = re.compile(r"```(.*?)(?:```|$)", re.DOTALL)
- inline_code_pattern = re.compile(r"`(.*?)`", re.DOTALL)
- code_blocks = code_block_pattern.findall(md_text)
- non_code_parts = code_block_pattern.split(md_text)[::2]
-
- result = []
- for non_code, code in zip(non_code_parts, [*code_blocks, ""], strict=False):
- if non_code.strip():
- formatted_non_code = normalize_markdown(non_code)
- if inline_code_pattern.search(formatted_non_code):
- result.append(markdown(formatted_non_code, extensions=["tables"]))
- else:
- result.append(mdtex2html.convert(formatted_non_code, extensions=["tables"]))
- if code.strip():
- formatted_code = f"\n```{code}\n\n```"
- formatted_code = markdown_to_html_with_syntax_highlight(formatted_code)
- result.append(formatted_code)
- result = "".join(result)
- result += ALREADY_CONVERTED_MARK
- return result
-
-
-def convert_asis(userinput):
- return f'{html.escape(userinput)}
' + ALREADY_CONVERTED_MARK
-
-
-def detect_converted_mark(userinput):
- return bool(userinput.endswith(ALREADY_CONVERTED_MARK))
-
-
-def detect_language(code):
- if code.startswith("\n"):
- first_line = ""
- else:
- first_line = code.strip().split("\n", 1)[0]
- language = first_line.lower() if first_line else ""
- first_line_length = len(first_line)
- code_without_language = code[first_line_length:].lstrip() if first_line else code
- return language, code_without_language
-
-
-def convert_to_markdown(text):
- text = text.replace("$", "$")
-
- def replace_leading_tabs_and_spaces(line):
- new_line = []
-
- for char in line:
- if char == "\t":
- new_line.append(" ")
- elif char == " ":
- new_line.append(" ")
- else:
- break
- new_line_length = len(new_line)
- return "".join(new_line) + line[new_line_length:]
-
- markdown_text = ""
- lines = text.split("\n")
- in_code_block = False
-
- for line in lines:
- if in_code_block is False and line.startswith("```"):
- in_code_block = True
- markdown_text += f"{line}\n"
- elif in_code_block is True and line.startswith("```"):
- in_code_block = False
- markdown_text += f"{line}\n"
- elif in_code_block:
- markdown_text += f"{line}\n"
- else:
- stripped_line = replace_leading_tabs_and_spaces(line)
- stripped_line = re.sub(r"^(#)", r"\\\1", stripped_line)
- markdown_text += f"{stripped_line} \n"
-
- return markdown_text
-
-
-def add_language_tag(text):
- def detect_language(code_block):
- try:
- lexer = guess_lexer(code_block)
- return lexer.name.lower()
- except ClassNotFound:
- return ""
-
- code_block_pattern = re.compile(r"(```)(\w*\n[^`]+```)", re.MULTILINE)
-
- def replacement(match):
- code_block = match.group(2)
- if match.group(2).startswith("\n"):
- language = detect_language(code_block)
- if language:
- return f"```{language}{code_block}```"
- else:
- return f"```\n{code_block}```"
- else:
- return match.group(1) + code_block + "```"
-
- return code_block_pattern.sub(replacement, text)
-
-
-def delete_last_conversation(chatbot, history):
- if len(chatbot) > 0:
- chatbot.pop()
-
- if len(history) > 0:
- history.pop()
-
- return (
- chatbot,
- history,
- "Delete Done",
- )
-
-
-def reset_state():
- return [], [], "Reset Done"
-
-
-def reset_textbox():
- return gr.update(value=""), ""
-
-
-def cancel_outputing():
- return "Stop Done"
-
-
-def transfer_input(inputs):
- return (
- inputs,
- gr.update(value=""),
- gr.Button(visible=True),
- )
-
-
-class State:
- interrupted = False
-
- def interrupt(self):
- self.interrupted = True
-
- def recover(self):
- self.interrupted = False
-
-
-shared_state = State()
-
-
-def is_stop_word_or_prefix(s: str, stop_words: list) -> bool:
- for stop_word in stop_words:
- if s.endswith(stop_word):
- return True
- for i in range(1, len(stop_word)):
- if s.endswith(stop_word[:i]):
- return True
-
- return False
diff --git a/examples/chat_app/assets/custom.css b/examples/chat_app/assets/custom.css
deleted file mode 100755
index d9c46c0908..0000000000
--- a/examples/chat_app/assets/custom.css
+++ /dev/null
@@ -1,487 +0,0 @@
-:root {
- --chatbot-color-light: #F3F3F3;
- --chatbot-color-dark: #121111;
-}
-
-/* status_display */
-#status_display {
- display: flex;
- min-height: 2.5em;
- align-items: flex-end;
- justify-content: flex-end;
-}
-
-#status_display p {
- font-size: .85em;
- font-family: monospace;
- color: var(--body-text-color-subdued);
-}
-
-
-
-/* usage_display */
-#usage_display {
- height: 1em;
-}
-
-#usage_display p {
- padding: 0 1em;
- font-size: .85em;
- font-family: monospace;
- color: var(--body-text-color-subdued);
-}
-
-/* list */
-ol:not(.options),
-ul:not(.options) {
- padding-inline-start: 2em !important;
-}
-
-/* Thank @Keldos-Li for fixing it */
-/* Light mode (default) */
-#chuanhu_chatbot {
- background-color: var(--chatbot-color-light) !important;
- color: #000000 !important;
-}
-
-[data-testid="bot"] {
-}
-
-[data-testid="user"] {
- background-color: #02C160 !important;
- color: #F3F3F3 !important;
- font-size: medium;
-}
-
-/* Dark mode */
-.dark #chuanhu_chatbot {
- background-color: var(--chatbot-color-dark) !important;
- color: #F3F3F3 !important;
-}
-
-.dark [data-testid="bot"] {
- background-color: #2C2C2C !important;
-}
-
-.dark [data-testid="user"] {
- background-color: #26B561 !important;
-}
-
-#chuanhu_chatbot {
- height: 100%;
- min-height: 400px;
-}
-
-[class *="message"] {
- border-radius: var(--radius-xl) !important;
- border: none;
- font-size: var(--text-md) !important;
- line-height: var(--line-md) !important;
- min-width: calc(var(--text-md)*var(--line-md) + 2*var(--spacing-xl));
-}
-
-[data-testid="bot"] {
- max-width: 85%;
- border-bottom-left-radius: 0 !important;
-}
-
-[data-testid="user"] {
- max-width: 85%;
- width: auto !important;
- border-bottom-right-radius: 0 !important;
-}
-
-/* Table */
-table {
- margin: 1em 0;
- border-collapse: collapse;
- empty-cells: show;
-}
-
-td,
-th {
- border: 1.2px solid var(--border-color-primary) !important;
- padding: 0.2em;
-}
-
-thead {
- background-color: rgba(175, 184, 193, 0.2);
-}
-
-thead th {
- padding: .5em .2em;
-}
-
-/* Inline code */
-#chuanhu_chatbot code {
- display: inline;
- white-space: break-spaces;
- border-radius: 6px;
- margin: 0 2px 0 2px;
- padding: .2em .4em .1em .4em;
- background-color: rgba(175, 184, 193, 0.2);
-}
-
-/* Code block */
-#chuanhu_chatbot pre code {
- display: block;
- overflow: auto;
- white-space: pre;
- background-color: hsla(0, 0%, 0%, 80%) !important;
- border-radius: 10px;
- padding: 1.4em 1.2em 0em 1.4em;
- margin: 1.2em 2em 1.2em 0.5em;
- color: #F3F3F3;
- box-shadow: 6px 6px 16px hsla(0, 0%, 0%, 0.2);
-}
-
-/* Hightlight */
-#chuanhu_chatbot .highlight {
- background-color: transparent
-}
-
-#chuanhu_chatbot .highlight .hll {
- background-color: #49483e
-}
-
-#chuanhu_chatbot .highlight .c {
- color: #75715e
-}
-
-/* Comment */
-#chuanhu_chatbot .highlight .err {
- color: #960050;
- background-color: #1e0010
-}
-
-/* Error */
-#chuanhu_chatbot .highlight .k {
- color: #66d9ef
-}
-
-/* Keyword */
-#chuanhu_chatbot .highlight .l {
- color: #ae81ff
-}
-
-/* Literal */
-#chuanhu_chatbot .highlight .n {
- color: #8828f2
-}
-
-/* Name */
-#chuanhu_chatbot .highlight .o {
- color: #f92672
-}
-
-/* Operator */
-#chuanhu_chatbot .highlight .p {
- color: #482822
-}
-
-/* Punctuation */
-#chuanhu_chatbot .highlight .ch {
- color: #75715e
-}
-
-/* Comment.Hashbang */
-#chuanhu_chatbot .highlight .cm {
- color: #75715e
-}
-
-/* Comment.Multiline */
-#chuanhu_chatbot .highlight .cp {
- color: #75715e
-}
-
-/* Comment.Preproc */
-#chuanhu_chatbot .highlight .cpf {
- color: #75715e
-}
-
-/* Comment.PreprocFile */
-#chuanhu_chatbot .highlight .c1 {
- color: #75715e
-}
-
-/* Comment.Single */
-#chuanhu_chatbot .highlight .cs {
- color: #75715e
-}
-
-/* Comment.Special */
-#chuanhu_chatbot .highlight .gd {
- color: #f92672
-}
-
-/* Generic.Deleted */
-#chuanhu_chatbot .highlight .ge {
- font-style: italic
-}
-
-/* Generic.Emph */
-#chuanhu_chatbot .highlight .gi {
- color: #a6e22e
-}
-
-/* Generic.Inserted */
-#chuanhu_chatbot .highlight .gs {
- font-weight: bold
-}
-
-/* Generic.Strong */
-#chuanhu_chatbot .highlight .gu {
- color: #75715e
-}
-
-/* Generic.Subheading */
-#chuanhu_chatbot .highlight .kc {
- color: #66d9ef
-}
-
-/* Keyword.Constant */
-#chuanhu_chatbot .highlight .kd {
- color: #66d9ef
-}
-
-/* Keyword.Declaration */
-#chuanhu_chatbot .highlight .kn {
- color: #f92672
-}
-
-/* Keyword.Namespace */
-#chuanhu_chatbot .highlight .kp {
- color: #66d9ef
-}
-
-/* Keyword.Pseudo */
-#chuanhu_chatbot .highlight .kr {
- color: #66d9ef
-}
-
-/* Keyword.Reserved */
-#chuanhu_chatbot .highlight .kt {
- color: #66d9ef
-}
-
-/* Keyword.Type */
-#chuanhu_chatbot .highlight .ld {
- color: #162b74
-}
-
-/* Literal.Date */
-#chuanhu_chatbot .highlight .m {
- color: #ae81ff
-}
-
-/* Literal.Number */
-#chuanhu_chatbot .highlight .s {
- color: #062b84
-}
-
-/* Literal.String */
-#chuanhu_chatbot .highlight .na {
- color: #a6e22e
-}
-
-/* Name.Attribute */
-#chuanhu_chatbot .highlight .nb {
- color: #482822
-}
-
-/* Name.Builtin */
-#chuanhu_chatbot .highlight .nc {
- color: #a6e22e
-}
-
-/* Name.Class */
-#chuanhu_chatbot .highlight .no {
- color: #66d9ef
-}
-
-/* Name.Constant */
-#chuanhu_chatbot .highlight .nd {
- color: #a6e22e
-}
-
-/* Name.Decorator */
-#chuanhu_chatbot .highlight .ni {
- color: #482822
-}
-
-/* Name.Entity */
-#chuanhu_chatbot .highlight .ne {
- color: #a6e22e
-}
-
-/* Name.Exception */
-#chuanhu_chatbot .highlight .nf {
- color: #a6e22e
-}
-
-/* Name.Function */
-#chuanhu_chatbot .highlight .nl {
- color: #1818f2
-}
-
-/* Name.Label */
-#chuanhu_chatbot .highlight .nn {
- color: #482822
-}
-
-/* Name.Namespace */
-#chuanhu_chatbot .highlight .nx {
- color: #a6e22e
-}
-
-/* Name.Other */
-#chuanhu_chatbot .highlight .py {
- color: #482822
-}
-
-/* Name.Property */
-#chuanhu_chatbot .highlight .nt {
- color: #f92672
-}
-
-/* Name.Tag */
-#chuanhu_chatbot .highlight .nv {
- color: #482822
-}
-
-/* Name.Variable */
-#chuanhu_chatbot .highlight .ow {
- color: #f92672
-}
-
-/* Operator.Word */
-#chuanhu_chatbot .highlight .w {
- color: #482822
-}
-
-/* Text.Whitespace */
-#chuanhu_chatbot .highlight .mb {
- color: #ae81ff
-}
-
-/* Literal.Number.Bin */
-#chuanhu_chatbot .highlight .mf {
- color: #ae81ff
-}
-
-/* Literal.Number.Float */
-#chuanhu_chatbot .highlight .mh {
- color: #ae81ff
-}
-
-/* Literal.Number.Hex */
-#chuanhu_chatbot .highlight .mi {
- color: #ae81ff
-}
-
-/* Literal.Number.Integer */
-#chuanhu_chatbot .highlight .mo {
- color: #ae81ff
-}
-
-/* Literal.Number.Oct */
-#chuanhu_chatbot .highlight .sa {
- color: #162b74
-}
-
-/* Literal.String.Affix */
-#chuanhu_chatbot .highlight .sb {
- color: #161b74
-}
-
-/* Literal.String.Backtick */
-#chuanhu_chatbot .highlight .sc {
- color: #162b74
-}
-
-/* Literal.String.Char */
-#chuanhu_chatbot .highlight .dl {
- color: #162b74
-}
-
-/* Literal.String.Delimiter */
-#chuanhu_chatbot .highlight .sd {
- color: #162b74
-}
-
-/* Literal.String.Doc */
-#chuanhu_chatbot .highlight .s2 {
- color: #162b74
-}
-
-/* Literal.String.Double */
-#chuanhu_chatbot .highlight .se {
- color: #ae81ff
-}
-
-/* Literal.String.Escape */
-#chuanhu_chatbot .highlight .sh {
- color: #162b74
-}
-
-/* Literal.String.Heredoc */
-#chuanhu_chatbot .highlight .si {
- color: #162b74
-}
-
-/* Literal.String.Interpol */
-#chuanhu_chatbot .highlight .sx {
- color: #162b74
-}
-
-/* Literal.String.Other */
-#chuanhu_chatbot .highlight .sr {
- color: #162b74
-}
-
-/* Literal.String.Regex */
-#chuanhu_chatbot .highlight .s1 {
- color: #162b74
-}
-
-/* Literal.String.Single */
-#chuanhu_chatbot .highlight .ss {
- color: #162b74
-}
-
-/* Literal.String.Symbol */
-#chuanhu_chatbot .highlight .bp {
- color: #482822
-}
-
-/* Name.Builtin.Pseudo */
-#chuanhu_chatbot .highlight .fm {
- color: #a6e22e
-}
-
-/* Name.Function.Magic */
-#chuanhu_chatbot .highlight .vc {
- color: #482822
-}
-
-/* Name.Variable.Class */
-#chuanhu_chatbot .highlight .vg {
- color: #482822
-}
-
-/* Name.Variable.Global */
-#chuanhu_chatbot .highlight .vi {
- color: #482822
-}
-
-/* Name.Variable.Instance */
-#chuanhu_chatbot .highlight .vm {
- color: #482822
-}
-
-/* Name.Variable.Magic */
-#chuanhu_chatbot .highlight .il {
- color: #ae81ff
-}
-
-/* Literal.Number.Integer.Long */
diff --git a/examples/chat_app/assets/custom.js b/examples/chat_app/assets/custom.js
deleted file mode 100755
index 219691448b..0000000000
--- a/examples/chat_app/assets/custom.js
+++ /dev/null
@@ -1 +0,0 @@
-// custom javascript here
diff --git a/examples/chat_app/consts.py b/examples/chat_app/consts.py
deleted file mode 100755
index 44db59915a..0000000000
--- a/examples/chat_app/consts.py
+++ /dev/null
@@ -1,8 +0,0 @@
-import logging
-
-logging.basicConfig(
- format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
- level=logging.INFO,
-)
-
-default_prompt = "<|user|>\n<|image_1|>\nWhat is shown in this image?<|end|>\n<|assistant|>\n"
diff --git a/examples/chat_app/image.png b/examples/chat_app/image.png
deleted file mode 100755
index dc7fc90bb7..0000000000
Binary files a/examples/chat_app/image.png and /dev/null differ
diff --git a/examples/chat_app/interface/hddr_llm_onnx_interface.py b/examples/chat_app/interface/hddr_llm_onnx_interface.py
deleted file mode 100755
index 8c7941a0fd..0000000000
--- a/examples/chat_app/interface/hddr_llm_onnx_interface.py
+++ /dev/null
@@ -1,198 +0,0 @@
-import gc
-import logging
-import os
-import sys
-
-import onnxruntime_genai as og
-from app_modules.utils import convert_to_markdown, is_stop_word_or_prefix, shared_state
-
-current_dir = os.path.dirname(os.path.realpath(__file__))
-sys.path.append(os.path.join(current_dir, "..", "..", ".."))
-
-
-class ONNXModel:
- """A wrapper for OnnxRuntime-GenAI to run ONNX LLM model."""
-
- def __init__(self, model_path, execution_provider):
- self.og = og
-
- logging.info("Loading model...")
- self.config = og.Config(model_path)
- self.config.clear_providers()
- if execution_provider != "cpu":
- self.config.append_provider(execution_provider)
- self.model = og.Model(self.config)
- logging.info("Loaded model...")
-
- self.tokenizer = og.Tokenizer(self.model)
- self.tokenizer_stream = self.tokenizer.create_stream()
- self.model_path = model_path
-
- if "phi" in self.model_path:
- self.template_header = ""
- self.enable_history_max = 10 if "mini" in self.model_path else 2
- self.history_template = "<|user|>{input}<|end|><|assistant|>{response}<|end|>"
- self.chat_template = "<|user|>{input}<|end|><|assistant|>"
- elif "Llama-3" in self.model_path:
- self.enable_history_max = 2
- self.template_header = """<|start_header_id|>system<|end_header_id|>
-You are a helpful AI assistant.<|eot_id|>"""
- self.history_template = """<|start_header_id|>user<|end_header_id|>
-{input}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
-{response}<|eot_id|>"""
-
- self.chat_template = """<|start_header_id|>user<|end_header_id|>
-{input}<|eot_id|><|start_header_id|>assistant<|end_header_id|>"""
-
- # self.chat_template = llama3_template
- else:
- self.enable_history_max = 2
- self.template_header = ""
- self.history_template = "[INST] {input} [/INST]{response}"
- self.chat_template = "[INST] {input} [/INST]"
-
- def generate_prompt_with_history(self, text, history, max_length=2048):
- prompt = ""
-
- for dialog in history[-self.enable_history_max :]:
- prompt += f"{self.history_template.format(input=dialog[0], response=dialog[1])}"
-
- prompt = self.template_header + prompt
-
- prompt += f"{self.chat_template.format(input=text)}"
-
- input_ids = self.tokenizer.encode(prompt)
-
- if len(input_ids) <= max_length:
- return input_ids
- else:
- history.clear()
- if "Llama-3" in self.model_path:
- prompt = self.template_header
- prompt += f"{self.chat_template.format(input=text)}"
- return self.tokenizer.encode(prompt)
-
- def search(
- self,
- input_ids,
- max_length: int,
- token_printing_step: int = 4,
- ):
- output_tokens = []
-
- params = og.GeneratorParams(self.model)
- search_options = {"max_length": max_length}
- params.set_search_options(**search_options)
-
- generator = og.Generator(self.model, params)
- generator.append_tokens(input_ids)
-
- idx = 0
- while not generator.is_done():
- idx += 1
- generator.generate_next_token()
- next_token = generator.get_next_tokens()[0]
- output_tokens.append(next_token)
-
- if idx % token_printing_step == 0:
- yield self.tokenizer.decode(output_tokens)
-
- def predict(self, text, chatbot, history, max_length_tokens, max_context_length_tokens, token_printing_step, *args):
- if text == "":
- yield chatbot, history, "Empty context."
- return
-
- inputs = self.generate_prompt_with_history(text, history, max_length=max_context_length_tokens)
-
- if inputs is None:
- yield chatbot, history, "Input too long."
- return
-
- input_ids = inputs[-max_context_length_tokens:]
-
- human_tokens = [
- "[|Human|]",
- "Human:",
- "### HUMAN:",
- "### User:",
- "USER:",
- "<|im_start|>user",
- "<|user|>",
- "### Instruction:",
- "GPT4 Correct User:",
- ]
-
- ai_tokens = [
- "[|AI|]",
- "AI:",
- "### RESPONSE:",
- "### Response:",
- "ASSISTANT:",
- "<|im_start|>assistant",
- "<|assistant|>",
- "GPT4 Correct Assistant:",
- "### Assistant:",
- ]
-
- for x in self.search(
- input_ids,
- max_length=max_length_tokens,
- token_printing_step=token_printing_step,
- ):
- sentence = x
-
- if is_stop_word_or_prefix(sentence, ["[|Human|]", "[|AI|]", "Human:", "AIL"]) is False:
- for human_token in human_tokens:
- if human_token in sentence:
- sentence = sentence[: sentence.index(human_token)].strip()
- break
-
- for ai_token in ai_tokens:
- if ai_token in sentence:
- sentence = sentence[: sentence.index(ai_token)].strip()
- break
- sentence = sentence.strip()
- a, b = (
- [[y[0], convert_to_markdown(y[1])] for y in history] + [[text, convert_to_markdown(sentence)]],
- [
- *history,
- [text, sentence],
- ],
- )
- yield a, b, "Generating..."
-
- if shared_state.interrupted:
- shared_state.recover()
- try:
- yield a, b, "Stop: Success"
- return
- except Exception as e:
- print(type(e).__name__, e)
-
- del input_ids
- gc.collect()
-
- try:
- yield a, b, "Generate: Success"
- except Exception as e:
- print(type(e).__name__, e)
-
- return
-
- def shutdown(self):
- pass
-
- def retry(self, chatbot, history, max_length_tokens, max_context_length_tokens, token_printing_step):
- if len(history) == 0:
- yield chatbot, history, "Empty context"
- return
- chatbot.pop()
- inputs = history.pop()[0]
- yield from self.predict(
- inputs,
- chatbot,
- history,
- max_length_tokens,
- max_context_length_tokens,
- token_printing_step,
- )
diff --git a/examples/chat_app/interface/multimodal_onnx_interface.py b/examples/chat_app/interface/multimodal_onnx_interface.py
deleted file mode 100755
index 909915a540..0000000000
--- a/examples/chat_app/interface/multimodal_onnx_interface.py
+++ /dev/null
@@ -1,130 +0,0 @@
-import gc
-
-import onnxruntime_genai as og
-from app_modules.utils import convert_to_markdown, shared_state
-from consts import default_prompt, logging
-
-logging.getLogger("interface")
-
-
-class MultiModal_ONNXModel:
- """A wrapper for ONNXRuntime GenAI to run ONNX Multimodal model"""
-
- def __init__(self, model_path, execution_provider):
- self.og = og
-
- logging.info("Loading model...")
- self.config = og.Config(model_path)
- self.config.clear_providers()
- if execution_provider != "cpu":
- self.config.append_provider(execution_provider)
- self.model = og.Model(self.config)
- logging.info("Loaded model ...")
-
- self.processor = self.model.create_multimodal_processor()
- self.tokenizer = self.processor.create_stream()
-
- self.enable_history_max = 2
- self.template_header = ""
- self.history_template = "[INST] {input} [/INST]{response}"
- self.chat_template = "<|user|>\n{tags}\n{input}<|end|>\n<|assistant|>\n"
-
- def generate_prompt_with_history(self, images, history, text=default_prompt, max_length=3072):
- prompt = ""
-
- for dialog in history[-self.enable_history_max :]:
- prompt += f"{self.history_template.format(input=dialog[0], response=dialog[1])}"
-
- prompt = self.template_header + prompt
-
- image_tags = ""
- for i in range(len(images)):
- image_tags += f"<|image_{i + 1}|>\n"
-
- prompt += f"{self.chat_template.format(input=text, tags=image_tags)}"
- if len(prompt) > max_length:
- history.clear()
- prompt = f"{self.chat_template.format(input=text, tags=image_tags)}"
-
- self.images = og.Images.open(*images)
-
- logging.info("Preprocessing images and prompt ...")
- inputs = self.processor(prompt, images=self.images)
- return inputs
-
- def search(self, inputs, max_length: int = 3072, token_printing_step: int = 1):
- output = ""
- params = og.GeneratorParams(self.model)
- params.set_inputs(inputs)
-
- search_options = {"max_length": max_length}
- params.set_search_options(**search_options)
- generator = og.Generator(self.model, params)
-
- idx = 0
- while not generator.is_done():
- idx += 1
- generator.generate_next_token()
- next_token = generator.get_next_tokens()[0]
- output += self.tokenizer.decode(next_token)
-
- return output
-
- def predict(self, text, chatbot, history, max_length_tokens, max_context_length_tokens, token_printing_step, *args):
- if text == "":
- yield chatbot, history, "Empty context"
- return
-
- inputs = self.generate_prompt_with_history(
- text=text, history=history, images=args[0], max_length=max_context_length_tokens
- )
-
- sentence = self.search(
- inputs,
- max_length=max_length_tokens,
- token_printing_step=token_printing_step,
- )
-
- sentence = sentence.strip()
- a, b = (
- [[y[0], convert_to_markdown(y[1])] for y in history] + [[text, convert_to_markdown(sentence)]],
- [
- *history,
- [text, sentence],
- ],
- )
- yield a, b, "Generating ... "
-
- if shared_state.interrupted:
- shared_state.recover()
- try:
- yield a, b, "Stop: Success"
- return
- except Exception as e:
- print(type(e).__name__, e)
-
- del inputs
- gc.collect()
-
- try:
- yield a, b, "Generate: Success"
-
- except Exception as e:
- print(type(e).__name__, e)
-
- return
-
- def shutdown(self):
- pass
-
- def retry(self, chatbot, history, max_length_tokens, max_context_length_tokens, token_printing_step, *args):
- if len(history) == 0:
- yield chatbot, history, "Empty context"
- return
-
- chatbot.pop()
- inputs = history.pop()[0]
-
- yield from self.predict(
- inputs, chatbot, history, max_length_tokens, max_context_length_tokens, token_printing_step, args[0]
- )
diff --git a/examples/chat_app/vision_UI_interface.png b/examples/chat_app/vision_UI_interface.png
deleted file mode 100644
index 48fecec3c1..0000000000
Binary files a/examples/chat_app/vision_UI_interface.png and /dev/null differ
diff --git a/examples/csharp/Common/Common.cs b/examples/csharp/Common/Common.cs
new file mode 100644
index 0000000000..d15476c374
--- /dev/null
+++ b/examples/csharp/Common/Common.cs
@@ -0,0 +1,1101 @@
+using Microsoft.ML.OnnxRuntime;
+using Microsoft.ML.OnnxRuntimeGenAI;
+using System.CommandLine;
+using System.Reflection;
+using System.Reflection.Metadata.Ecma335;
+using System.Text;
+using System.Text.Encodings.Web;
+using System.Text.Json;
+using System.Text.Json.Serialization;
+
+namespace CommonUtils
+{
+ public static class Common
+ {
+ ///
+ /// Set log options inside ORT GenAI
+ ///
+ /// Dump inputs to the model in the console
+ /// Dump outputs to the model in the console
+ ///
+ /// None
+ ///
+ public static void SetLogger(bool inputs = true, bool outputs = true)
+ {
+ Utils.SetLogBool("enabled", true);
+ Utils.SetLogBool("model_input_values", inputs);
+ Utils.SetLogBool("model_output_values", outputs);
+ }
+
+ ///
+ /// Register execution provider if path is provided
+ ///
+ /// Name of execution provider to set
+ /// Path to execution provider to set
+ ///
+ /// None
+ ///
+ public static void RegisterEP(string ep, string ep_path)
+ {
+ if (string.IsNullOrEmpty(ep_path))
+ {
+ return; // No library path specified, skip registration
+ }
+
+ Console.WriteLine($"Registering execution provider: {ep_path}");
+
+ var ortEnv = OrtEnv.Instance();
+ if (string.Equals(ep, "cuda", StringComparison.OrdinalIgnoreCase))
+ {
+ ortEnv.RegisterExecutionProviderLibrary("CUDAExecutionProvider", ep_path);
+ }
+ else if (string.Equals(ep, "NvTensorRtRtx", StringComparison.OrdinalIgnoreCase))
+ {
+ ortEnv.RegisterExecutionProviderLibrary("NvTensorRTRTXExecutionProvider", ep_path);
+ }
+ else
+ {
+ Console.WriteLine($"Warning: EP registration not supported for {ep}");
+ Console.WriteLine("Only 'cuda' and 'NvTensorRtRtx' support plug-in libraries.");
+ return;
+ }
+
+ Console.WriteLine($"Registered {ep} successfully!");
+ }
+
+ ///
+ /// Get Config object and set EP-specific and search-specific options inside it
+ ///
+ /// Path to model folder containing GenAI config
+ /// Name of execution provider to set
+ /// Map of EP-specific option names and their values
+ /// Class of search-specific option names and their values
+ ///
+ /// ORT GenAI config object with all options set
+ ///
+ public static Config GetConfig(string path, string ep, Dictionary? ep_options, GeneratorParamsArgs search_options)
+ {
+ var config = new Config(path);
+ if (ep != "follow_config")
+ {
+ config.ClearProviders();
+ if (ep != "cpu")
+ {
+ Console.WriteLine($"Setting model to {ep}");
+ config.AppendProvider(ep);
+ }
+
+ // Set any EP-specific options
+ if (ep_options != null)
+ {
+ foreach (var kvp in ep_options)
+ {
+ var k = kvp.Key;
+ var v = kvp.Value;
+ if (k == "enable_cuda_graph" && (ep == "cuda" || ep == "NvTensorRtRtx") && search_options.num_beams > 1)
+ {
+ // Disable CUDA graph if using beam search (num_beams > 1),
+ // num_beams > 1 requires past_present_share_buffer to be false so enable_cuda_graph must be false
+ config.SetProviderOption(ep, "enable_cuda_graph", "0");
+ }
+ else
+ {
+ config.SetProviderOption(ep, k, v);
+ }
+ }
+ }
+ }
+
+ /**
+ * TODO: Uncomment the below snippet to use config.Overlay once the C# binding to Config.Overlay
+ * is in a stable package release.
+ */
+
+ // // Create serializer context to skip null attributes
+ // var options = new JsonSerializerOptions()
+ // {
+ // WriteIndented = true,
+ // PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
+ // DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
+ // };
+ // var ctx = new ArgsSerializerContext(options);
+ // var json = JsonSerializer.Serialize(search_options, ctx.GeneratorParamsArgs);
+
+ // // Set any search-specific options that need to be known before constructing a Model object
+ // // Otherwise they can be set with params.SetSearchOptions(search_options)
+ // config.Overlay(json);
+ return config;
+ }
+
+ ///
+ /// Set search options for a generator's params during decoding
+ ///
+ /// Generator params object to set on
+ /// Arguments provided by user
+ /// Use verbose logging
+ ///
+ /// None
+ ///
+ public static void SetSearchOptions(GeneratorParams generatorParams, GeneratorParamsArgs args, bool verbose)
+ {
+ var type = args.GetType();
+ var options = new List();
+ foreach (var prop in type.GetProperties(BindingFlags.Instance | BindingFlags.Public))
+ {
+ var name = prop.Name;
+ var value = prop.GetValue(args);
+ if (value == null || name == "chunk_size") continue;
+
+ if (name == "do_sample")
+ {
+ var val = Convert.ToBoolean(value);
+ options.Add($"{name}: {val}");
+ generatorParams.SetSearchOption(name, val);
+ }
+ else
+ {
+ var val = Convert.ToDouble(value);
+ options.Add($"{name}: {val}");
+ generatorParams.SetSearchOption(name, val);
+ }
+ }
+
+ if (verbose) Console.WriteLine("GeneratorParams created: {" + string.Join(", ", options) + "}");
+ }
+
+ ///
+ /// Apply the chat template with various fallback options
+ ///
+ /// Path to folder containing model
+ /// Tokenizer object to use
+ /// String-encoded list of messages
+ /// Add tokens to indicate the start of the AI's response
+ /// String-encoded list of tools
+ ///
+ /// Prompt to encode
+ ///
+ public static string ApplyChatTemplate(string model_path, Tokenizer tokenizer, string messages, bool add_generation_prompt, string tools = "")
+ {
+ var template_str = "";
+ var jinja_path = Path.Combine(model_path, "chat_template.jinja");
+ if (File.Exists(jinja_path))
+ {
+ template_str = File.ReadAllText(jinja_path, Encoding.UTF8);
+ }
+
+ var prompt = tokenizer.ApplyChatTemplate(
+ messages: messages,
+ tools: tools,
+ add_generation_prompt: add_generation_prompt,
+ template_str: template_str
+ );
+ return prompt;
+ }
+
+ ///
+ /// Get prompt for 'user' role in chat template
+ ///
+ /// Provided prompt
+ /// Interactive mode (otherwise uses either user-provided prompt or default)
+ ///
+ /// Prompt to use
+ ///
+ public static string GetUserPrompt(string prompt, bool interactive)
+ {
+ string? text;
+ while (true)
+ {
+ if (interactive)
+ {
+ Console.Write("Prompt (Use quit() to exit): ");
+ text = Console.ReadLine();
+ }
+ else
+ {
+ text = prompt;
+ }
+
+ if (string.IsNullOrEmpty(text))
+ {
+ Console.WriteLine("Empty input. Please enter a valid prompt.");
+ continue; // Skip to the next iteration if input is empty
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ return text;
+ }
+
+ ///
+ /// Get paths to media for user
+ ///
+ /// User-provided media paths
+ /// Interactive mode (otherwise uses either user-provided media paths or default)
+ /// The media type being obtained
+ ///
+ /// All media filepaths to read and encode
+ ///
+ public static List GetUserMediaPaths(List media_paths, bool interactive, string media_type)
+ {
+ // Check media type
+ var media_type_lower = media_type.ToLowerInvariant();
+ if (media_type_lower != "audio" && media_type_lower != "image")
+ {
+ throw new Exception("Media type must be 'image' or 'audio'");
+ }
+ var media_type_capitalized = char.ToUpperInvariant(media_type_lower[0]) + media_type_lower[1..];
+
+ var paths = new List();
+ if (media_paths.Count > 0)
+ {
+ // If user-provided media paths
+ paths = media_paths;
+ }
+ else if (interactive)
+ {
+ // If interactive mode is on
+ Console.Write($"{media_type_capitalized} Path (comma separated; leave empty if no {media_type_lower}): ");
+ var line = Console.ReadLine() ?? string.Empty;
+
+ // Split by comma, trim whitespace and surrounding quotes
+ paths = line.Split(',', StringSplitOptions.RemoveEmptyEntries)
+ .Select(p =>
+ {
+ // Trim quotes
+ var s = p.Trim();
+ if (s.Length >= 2 && ((s[0] == '"' && s[^1] == '"') || (s[0] == '\'' && s[^1] == '\'')))
+ {
+ s = s[1..^1]; // strip surrounding quotes
+ }
+ return s;
+ })
+ .Where(p => !string.IsNullOrWhiteSpace(p))
+ .ToList();
+ }
+
+ paths = paths.Where(p => !string.IsNullOrWhiteSpace(p)).Select(p => p.Trim()).ToList();
+ foreach (var path in paths)
+ {
+ if (!File.Exists(path))
+ {
+ throw new Exception($"{media_type_capitalized} file not found: {path}");
+ }
+ Console.WriteLine($"Using {media_type_lower}: {path}");
+ }
+
+ return paths;
+ }
+
+ ///
+ /// Get images for user
+ ///
+ /// User-provided image paths
+ /// Interactive mode (otherwise uses either user-provided image paths or default)
+ ///
+ /// (all images, number of images) as a tuple
+ ///
+ public static (Images?, int) GetUserImages(List image_paths, bool interactive)
+ {
+ var media_type = "image";
+ List paths = GetUserMediaPaths(image_paths, interactive, media_type);
+ if (paths.Count == 0)
+ {
+ Console.WriteLine($"No {media_type} provided");
+ return (null, 0);
+ }
+
+ var images = Images.Load(paths.ToArray());
+ return (images, paths.Count);
+ }
+
+ ///
+ /// Get audios for user
+ ///
+ /// User-provided audio paths
+ /// Interactive mode (otherwise uses either user-provided audio paths or default)
+ ///
+ /// (all audios, number of audios) as a tuple
+ ///
+ public static (Audios?, int) GetUserAudios(List audio_paths, bool interactive)
+ {
+ var media_type = "audio";
+ List paths = GetUserMediaPaths(audio_paths, interactive, media_type);
+ if (paths.Count == 0)
+ {
+ Console.WriteLine($"No {media_type} provided");
+ return (null, 0);
+ }
+
+ var audios = Audios.Load(paths.ToArray());
+ return (audios, paths.Count);
+ }
+
+ ///
+ /// Get content for 'user' role in chat template
+ ///
+ /// Model type inside ORT GenAI
+ /// Number of images
+ /// Number of audios
+ /// User prompt
+ ///
+ /// Combined content for 'user' role
+ ///
+ public static string GetUserContent(string model_type, int num_images, int num_audios, string prompt)
+ {
+ string content;
+ // Combine all image tags, audio tags, and text into one user content
+ if (model_type == "phi3v")
+ {
+ // Phi-3 vision, Phi-3.5 vision
+ var image_tags = "";
+ for (int i = 0; i < num_images; i++)
+ {
+ image_tags += $"<|image_{i + 1}|>\n";
+ }
+ content = image_tags + prompt;
+ }
+ else if (model_type == "phi4mm")
+ {
+ // Phi-4 multimodal
+ var image_tags = "";
+ for (int i = 0; i < num_images; i++)
+ {
+ image_tags += $"<|image_{i + 1}|>\n";
+ }
+ var audio_tags = "";
+ for (int i = 0; i < num_audios; i++)
+ {
+ audio_tags += $"<|audio_{i + 1}|>\n";
+ }
+ content = image_tags + audio_tags + prompt;
+ }
+ else if (model_type == "qwen2_5_vl" || model_type == "fara")
+ {
+ // Qwen-2.5 VL, Fara
+ var image_tags = "";
+ for (int i = 0; i < num_images; i++)
+ {
+ image_tags += "<|vision_start|><|image_pad|><|vision_end|>";
+ }
+ content = image_tags + prompt;
+ }
+ else
+ {
+ // Gemma-3 style: structured content
+ var list = new List>();
+ for (int i = 0; i < num_images; i++)
+ {
+ list.Add(new Dictionary
+ {
+ ["type"] = "image"
+ });
+ }
+ list.Add(new Dictionary
+ {
+ ["type"] = "text",
+ ["text"] = prompt
+ });
+ content = JsonSerializer.Serialize(list);
+ }
+
+ return content;
+ }
+
+ ///
+ /// Convert a list of tools to a list of tool schemas
+ ///
+ /// List of OpenAI-compatible tools
+ ///
+ /// List of JSON schema compatible tools
+ ///
+ public static IList ToolsToSchemas(IList tools)
+ {
+ var tool_schemas = new List { };
+ foreach (var tool in tools)
+ {
+ var name = new Dictionary()
+ {
+ { "const", tool.Function.Name }
+ };
+ var properties = new Dictionary
+ {
+ { "name", name }
+ };
+
+ var tool_parameters_exist = tool.Function.Parameters.Count != 0;
+ if (tool_parameters_exist)
+ {
+ var parameters = new Dictionary
+ {
+ { "type", tool.Function.Parameters.GetValueOrDefault("type", "object") },
+ { "properties", tool.Function.Parameters.GetValueOrDefault("properties", new Dictionary{}) },
+ { "required", tool.Function.Parameters.GetValueOrDefault("required", new List{}) }
+ };
+ properties.Add("parameters", parameters);
+ }
+
+ var tool_schema = new ToolSchema()
+ {
+ Description = tool.Function.Description,
+ Type = "object",
+ Properties = properties,
+ Required = tool_parameters_exist ? ["name", "parameters"] : ["name"],
+ AdditionalProperties = false
+ };
+ tool_schemas.Add(tool_schema);
+ }
+ return tool_schemas;
+ }
+
+ ///
+ /// Create a JSON schema from a list of tools
+ ///
+ /// List of OpenAI-compatible tools
+ /// Output can have a tool call
+ ///
+ /// JSON schema as a JSON-compatible string
+ ///
+ public static string GetJsonSchema(IList tools, bool tool_output)
+ {
+ var schemas = ToolsToSchemas(tools);
+ var x_guidance = new Dictionary
+ {
+ { "whitespace_flexible", false },
+ { "key_separator", ": "},
+ { "item_separator", ", " }
+ };
+ var json_schema = new JsonSchema
+ {
+ XGuidance = x_guidance,
+ Type = "array",
+ Items = new Dictionary>{
+ { "anyOf", schemas }
+ },
+ MinItems = tool_output ? 1 : 0
+ };
+
+ // Create serializer context with encoder to not escape non-ASCII characters (e.g. don't convert '&' to \u0026)
+ // and to skip null attributes
+ var options = new JsonSerializerOptions()
+ {
+ WriteIndented = true,
+ PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
+ Encoder = JavaScriptEncoder.UnsafeRelaxedJsonEscaping,
+ DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
+ };
+ var ctx = new ToolSerializerContext(options);
+
+ return JsonSerializer.Serialize(json_schema, ctx.JsonSchema);
+ }
+
+ ///
+ /// Create a LARK grammar from a list of tools
+ ///
+ /// List of OpenAI-compatible tools
+ /// Output can have text
+ /// Output can have a tool call
+ /// String representation of tool call starting token
+ /// String representation of tool call ending token
+ ///
+ /// LARK grammar as a string
+ ///
+ public static string GetLarkGrammar(IList tools, bool text_output, bool tool_output, string tool_call_start, string tool_call_end)
+ {
+ var known_tool_call_ids = !string.IsNullOrEmpty(tool_call_start) && !string.IsNullOrEmpty(tool_call_end);
+ var call_type = known_tool_call_ids ? "toolcall" : "functioncall";
+
+ var rows = new List();
+ string? start_row;
+ if (text_output && !tool_output)
+ {
+ start_row = "start: TEXT";
+ }
+ else if (!text_output && tool_output)
+ {
+ start_row = $"start: {call_type}";
+ }
+ else if (text_output && tool_output)
+ {
+ start_row = $"start: TEXT | {call_type}";
+ }
+ else
+ {
+ throw new Exception("At least one of 'text_output' and 'tool_output' must be true");
+ }
+ rows.Add(start_row);
+
+ if (text_output)
+ {
+ var text_row = "TEXT: /[^{<](.|\\n)*/";
+ rows.Add(text_row);
+ }
+
+ if (tool_output)
+ {
+ var schema = GetJsonSchema(tools: tools, tool_output: tool_output);
+ if (known_tool_call_ids)
+ {
+ var tool_row = $"toolcall: {tool_call_start} functioncall {tool_call_end}";
+ rows.Add(tool_row);
+ }
+
+ var func_row = $"functioncall: %json {schema}";
+ rows.Add(func_row);
+ }
+
+ var grammar = string.Join("\n", rows);
+ return grammar;
+ }
+
+ ///
+ /// Convert a JSON-deserialized object of tools to a list of Tool objects
+ ///
+ /// JSON-deserialized object containing OpenAI-compatible tool definitions
+ ///
+ /// List of Tool objects
+ ///
+ public static IList ToTool(IList> tool_defs)
+ {
+ var tools = new List { };
+ foreach (var tool_def in tool_defs)
+ {
+ if (tool_def.TryGetValue("function", out var functionObj))
+ {
+ var functionStr = JsonSerializer.Serialize(functionObj);
+ var functionDict = JsonSerializer.Deserialize(functionStr, ToolSerializerContext.Default.DictionaryStringObject);
+ if (functionDict == null) continue;
+
+ var name = functionDict.TryGetValue("name", out var nameObj) ? nameObj?.ToString() ?? string.Empty : string.Empty;
+ var description = functionDict.TryGetValue("description", out var descObj) ? descObj?.ToString() ?? string.Empty : string.Empty;
+
+ if (functionDict.TryGetValue("parameters", out var paramObj))
+ {
+ var paramStr = JsonSerializer.Serialize(paramObj);
+ var paramDict = JsonSerializer.Deserialize(paramStr, ToolSerializerContext.Default.DictionaryStringObject);
+ if (paramDict == null) continue;
+
+ var func = new FunctionDefinition
+ {
+ Name = name,
+ Description = description,
+ Parameters = paramDict
+ };
+ var tool = new Tool()
+ {
+ Type = "function",
+ Function = func
+ };
+ tools.Add(tool);
+ }
+ }
+ }
+ return tools;
+ }
+
+ ///
+ /// Create a grammar to use with LLGuidance
+ ///
+ /// Type of format requested
+ /// Path to file containing OpenAI-compatible tool definitions
+ /// JSON-serialized string containing OpenAI-compatible tool definitions
+ /// List of OpenAI-compatible tools defined in memory
+ /// Output can have text
+ /// Output can have a tool call
+ /// String representation of tool call starting token (e.g. )
+ /// String representation of tool call ending token (e.g. )
+ ///
+ /// (grammar type, grammar data, tools) as a tuple of strings
+ ///
+ public static (string, string, string) GetGuidance(
+ string response_format = "",
+ string filepath = "",
+ string tools_str = "",
+ List