diff --git a/src/config.cpp b/src/config.cpp index 225a080bab..4e346d1ac7 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -589,38 +589,6 @@ struct Embedding_Element : JSON::Element { EmbeddingOutputs_Element outputs_{v_.outputs}; }; -struct PromptTemplates_Element : JSON::Element { - explicit PromptTemplates_Element(std::optional& v) : v_{v} {} - - void OnValue(std::string_view name, JSON::Value value) override { - // if one of templates is given in json, then any non-specified template will be default "{Content}" - if (name == "assistant") { - EnsureAvailable(); - v_->assistant = JSON::Get(value); - } else if (name == "prompt") { - EnsureAvailable(); - v_->prompt = JSON::Get(value); - } else if (name == "system") { - EnsureAvailable(); - v_->system = JSON::Get(value); - } else if (name == "user") { - EnsureAvailable(); - v_->user = JSON::Get(value); - } else { - throw JSON::unknown_value_error{}; - } - } - - private: - std::optional& v_; - - void EnsureAvailable() { - if (!v_.has_value()) { - v_.emplace(); - } - } -}; - struct Model_Element : JSON::Element { explicit Model_Element(Config::Model& v) : v_{v} {} @@ -664,9 +632,6 @@ struct Model_Element : JSON::Element { if (name == "embedding") { return embedding_; } - if (name == "prompt_templates") { - return prompt_templates_; - } if (name == "speech") { return speech_; } @@ -680,7 +645,6 @@ struct Model_Element : JSON::Element { Eos_Array_Element eos_token_ids_{v_}; Vision_Element vision_{v_.vision}; Embedding_Element embedding_{v_.embedding}; - PromptTemplates_Element prompt_templates_{v_.prompt_templates}; Speech_Element speech_{v_.speech}; }; diff --git a/src/config.h b/src/config.h index baf4d7013e..ea18bd11c8 100644 --- a/src/config.h +++ b/src/config.h @@ -25,7 +25,6 @@ struct Config { static constexpr std::string_view InputsEmbedsName = "inputs_embeds"; static constexpr std::string_view CurrentSequenceLengthName = "current_sequence_length"; static constexpr std::string_view PastSequenceLengthName = "past_sequence_length"; - static constexpr std::string_view promptTemplate = "{Content}"; static constexpr std::string_view TotalSequenceLengthName = "total_sequence_length"; static constexpr std::string_view TokenTypeIdsName = "token_type_ids"; @@ -206,13 +205,6 @@ struct Config { } decoder; - struct PromptTemplates { - std::string assistant{Defaults::promptTemplate}; - std::string prompt{Defaults::promptTemplate}; - std::string system{Defaults::promptTemplate}; - std::string user{Defaults::promptTemplate}; - }; - std::optional prompt_templates; } model; struct Search { diff --git a/src/python/py/models/builder.py b/src/python/py/models/builder.py index 5eb0de4f21..fa57615500 100644 --- a/src/python/py/models/builder.py +++ b/src/python/py/models/builder.py @@ -400,11 +400,6 @@ def make_genai_config(self, model_name_or_path, extra_kwargs, out_dir): ep_options = { self.ep : self.ep_attrs[self.ep] } genai_config["model"]["decoder"]["session_options"]["provider_options"].append(ep_options) - if self.extra_options.get("include_prompt_templates", False): - prompt_templates = self._get_prompt_templates(model_name_or_path, extra_kwargs) - if prompt_templates is not None: - genai_config["model"]["prompt_templates"] = prompt_templates - print(f"Saving GenAI config in {out_dir}") with open(os.path.join(out_dir,"genai_config.json"), "w") as f: json.dump(genai_config, f, indent=4) @@ -413,30 +408,6 @@ def save_processing(self, model_name_or_path, extra_kwargs, out_dir): tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, token=self.hf_token, trust_remote_code=True, **extra_kwargs) print(f"Saving processing files in {out_dir} for GenAI") tokenizer.save_pretrained(out_dir) - - def _get_prompt_templates(self, hf_name, extra_kwargs): - try: - # disable end of sentence padding with eos_token=None - tokenizer = AutoTokenizer.from_pretrained(hf_name, token=self.hf_token, trust_remote_code=True, eos_token=None, **extra_kwargs) - system_template = tokenizer.apply_chat_template([{'role': 'system', 'content': '{Content}'}], tokenize=False) - system_user_template = tokenizer.apply_chat_template([{'role': 'system', 'content': '{Content}'}, {'role': 'user', 'content': '{Content}'}], tokenize=False) - system_user_assistant_template = tokenizer.apply_chat_template([{'role': 'system', 'content': '{Content}'}, {'role': 'user', 'content': '{Content}'}, {'role': 'assistant', 'content': '{Content}'}], tokenize=False) - assert system_user_template.startswith(system_template), "Chat templates may contain padding tokens, leading to incorrect prompt templates" - assert system_user_assistant_template.startswith(system_user_template), "Chat templates may contain padding tokens, leading to incorrect prompt templates" - user_template = system_user_template[len(system_template):] - assistant_template = system_user_assistant_template[len(system_user_template):] - prompt_template = system_user_assistant_template[len(system_template):] - prompt_template = prompt_template[:prompt_template.rfind('{Content}')] - templates = { - "system": system_template, - "user": user_template, - "assistant": assistant_template, - "prompt": prompt_template - } - return templates - except Exception as e: - print(f"Failed to get prompt templates. Error: {e}") - return None def save_model(self, out_dir): print(f"Saving ONNX model in {out_dir}") @@ -3284,7 +3255,7 @@ def check_extra_options(kv_pairs): """ Check key-value pairs and set values correctly """ - bools = ["int4_is_symmetric", "exclude_embeds", "exclude_lm_head", "include_hidden_states", "enable_cuda_graph", "use_8bits_moe", "use_qdq", "include_prompt_templates"] + bools = ["int4_is_symmetric", "exclude_embeds", "exclude_lm_head", "include_hidden_states", "enable_cuda_graph", "use_8bits_moe", "use_qdq"] for key in bools: if key in kv_pairs: if kv_pairs[key] in {"false", "False", "0"}: @@ -3550,8 +3521,6 @@ def get_args(): Use this option to enable GPUs that do not support FP16 on WebGPU (e.g. GTX 10xx). adapter_path = Path to folder on disk containing the adapter files (adapter_config.json and adapter model weights). Use this option for LoRA models. - include_prompt_templates = Include prompt templates in the GenAI config file. Default is false. - Use this option to include per-role prompt templates in the `genai_config.json` file. """), )