From 57063b4df6c3185f4547b66f16ca6ec65d9e6648 Mon Sep 17 00:00:00 2001
From: HuiyingLi <willwin.lee@gmail.com>
Date: Wed, 7 Feb 2024 00:20:00 -0800
Subject: [PATCH 1/7] add/rename from nvgpt to nv_steerlm, add nv_dpo template

Signed-off-by: HuiyingLi <willwin.lee@gmail.com>
---
 .../multimodal/data/neva/conversation.py      |   2 +
 .../multimodal/data/neva/neva_dataset.py      | 108 +++++++++++++++++-
 .../common/text_generation_strategy.py        |   8 +-
 3 files changed, 115 insertions(+), 3 deletions(-)
diff --git a/nemo/collections/multimodal/data/neva/conversation.py b/nemo/collections/multimodal/data/neva/conversation.py
index 886049dd5170..744329b47ed4 100644
--- a/nemo/collections/multimodal/data/neva/conversation.py
+++ b/nemo/collections/multimodal/data/neva/conversation.py
@@ -400,6 +400,8 @@ def dict(self):
     "v1_mmtag": conv_llava_v1_mmtag,
     "llava_llama_2": conv_llava_llama_2,
     "nvgpt": conv_nvgpt,
+    "nv_dpo": conv_nvgpt,
+    "nv_steerlm": conv_nvgpt
 }
 
 
diff --git a/nemo/collections/multimodal/data/neva/neva_dataset.py b/nemo/collections/multimodal/data/neva/neva_dataset.py
index 90f862869369..5434392ce482 100644
--- a/nemo/collections/multimodal/data/neva/neva_dataset.py
+++ b/nemo/collections/multimodal/data/neva/neva_dataset.py
@@ -462,6 +462,110 @@ def preprocess_nvgpt(sources: dict, tokenizer, cfg,) -> Dict:
     return dict(tokens=tokens, labels=labels,)
 
 
+def preprocess_nv_dpo(sources: dict, tokenizer, cfg,) -> Dict:
+    """
+    Preprocess a given set of conversational sources using nvgpt conversation template
+
+    This function processes conversations by first ensuring the conversation starts with a 'human' role, then tokenizes the conversations, applies specific token replacements, and finally masks labels for training purposes.
+
+    Parameters:
+    - sources: A dictionary containing conversational data. Expected format is a dict of conversations, where each conversation is a list of messages, and each message is a dict with 'from' (role) and 'value' (message text).
+    - tokenizer: A tokenizer from the Hugging Face Transformers library used for tokenizing the conversations.
+    - cfg: Configuration settings which include 'add_extra_token' (bool) to determine if an extra token should be added to the tokenized output, and 'context_length' for specifying the tokenization context length.
+
+    Returns:
+    - Dict: A dictionary containing two keys:
+        - 'tokens': A tensor of tokenized conversation data.
+        - 'labels': A tensor of labels for the conversation data, used for training models. Labels are masked based on the conversation structure.
+
+    Note:
+    - The function includes specific token replacements (e.g., DEFAULT_IMAGE_PATCH_TOKEN, <s>, </s>) and masking techniques for labels.
+    - It is designed to work with conversational data where messages alternate between a 'human' and a 'gpt' role.
+    - The function asserts that each message in a conversation alternates between the defined roles and skips messages not starting with the 'human' role.
+    """
+
+    """<extra_id_0>System\n\n<extra_id_1>User\n{user input}\n<extra_id_1>Assistant\n"""
+
+
+    conv = conversation_lib.conv_nvgpt.copy()
+
+    # Apply prompt templates
+    conversations = []
+    for source in sources:
+        conv.messages = []
+        conv.system = source.get('system', conv.system)
+
+        strip_end_for_inference = False
+        for i, turn in enumerate(source['conversations']):
+
+            if i % 2 == 1:
+                turn['from'] = conv.roles[1]
+                conv.append_message(turn['from'], turn['value'])
+                if not turn["value"]:
+                    strip_end_for_inference = (
+                        True  # in inference, current turn is empty, thus end tokens need to striped.
+                    )
+            else:
+                turn['from'] = conv.roles[0]
+                conv.append_message(turn['from'], turn['value'])
+        context = conv.get_prompt()
+        if strip_end_for_inference:
+            context = context.rstrip("\n<extra_id_1>") + "\n"
+        conversations.append(context)
+
+    add_extra_token = cfg.get("add_extra_token")
+    # Tokenize conversations
+    tokens = tokenize(
+        texts=conversations,
+        tokenizer=tokenizer,
+        context_length=cfg.get("context_length"),
+        add_extra_token=add_extra_token,
+    )
+
+    labels = tokens.clone().detach()
+
+    # Mask targets
+    sep = conv.sep + conv.roles[1] + "\n"
+    for conversation, target in zip(conversations, labels):
+        rounds = conversation.split(conv.sep)
+        re_rounds = [conv.sep.join(rounds[:3])]  # system + user + gpt
+
+        for conv_idx in range(3, len(rounds), 2):
+            re_rounds.append(conv.sep.join(rounds[conv_idx : conv_idx + 2]))  # user + gpt
+
+        cur_len = 0
+        for i, rou in enumerate(re_rounds):
+            if rou == "":
+                break
+            parts = rou.split(sep)
+            if len(parts) != 2:
+                break
+
+            instruction_len = len(tokenizer.text_to_ids(parts[0] + sep))
+            round_len = len(tokenizer.text_to_ids(rou + conv.sep))
+            import pdb; pdb.set_trace()
+            target[cur_len : cur_len + instruction_len] = IGNORE_INDEX
+
+            cur_len += round_len
+        target[cur_len:] = IGNORE_INDEX
+
+
+    # Check if masking working correctly
+    #print(tokenizer.ids_to_text([a[0] for a in filter(lambda x:x[1]!=-1 ,[x for x in zip(tokens[0].numpy().tolist(), labels[0].numpy().tolist())])]))
+    #print([x for x in zip(tokens[0].numpy().tolist(), labels[0].numpy().tolist())])
+
+    if add_extra_token:
+        tokens = tokens[:, :-1].contiguous()
+        labels = labels[:, 1:].contiguous()
+    else:
+        labels = torch.roll(labels, shifts=-1, dims=-1)
+        labels[:, -1] = IGNORE_INDEX
+
+
+
+    return dict(tokens=tokens, labels=labels,)
+
+
 def preprocess_plain(sources, tokenizer, cfg,) -> Dict:
     """
     Preprocesses plain text sources (no template) for tokenization and label generation.
@@ -604,8 +708,10 @@ def expand2square(pil_img, background_color):
             images_tensors = torch.tensor([])
             sources = copy.deepcopy(sources)
 
-        if self.conv_template == "nvgpt":
+        if self.conv_template in ["nvgpt", "nv_steerlm"]:
             data_dict = preprocess_nvgpt(sources, self.tokenizer, self.multimodal_cfg,)
+        elif self.conv_template == "nv_dpo":
+            data_dict = preprocess_nv_dpo(sources, self.tokenizer, self.multimodal_cfg,)
         elif self.conv_template == "v1":
             data_dict = preprocess_v1(sources, self.tokenizer, self.multimodal_cfg,)
         elif self.conv_template == "llama_2":
diff --git a/nemo/collections/nlp/modules/common/text_generation_strategy.py b/nemo/collections/nlp/modules/common/text_generation_strategy.py
index fd68eef592fd..f7e72e629d74 100644
--- a/nemo/collections/nlp/modules/common/text_generation_strategy.py
+++ b/nemo/collections/nlp/modules/common/text_generation_strategy.py
@@ -330,11 +330,12 @@ def neva_process_prompts(prompt, tokenizer, multimodal_cfg, num_media_latents, c
         preprocess_llama_2,
         preprocess_multimodal,
         preprocess_nvgpt,
+        preprocess_nv_dpo,
         preprocess_v1,
     )
 
     list_data_dict = []
-    if multimodal_cfg["conv_template"] == "nvgpt":
+    if multimodal_cfg["conv_template"] in ["nvgpt", "nv_steerlm", "nv_dpo"]:
         record = {
             'system': 'A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user\'s questions.\n\n',
             'conversations': [{'from': 'User', 'value': prompt}, {'from': 'Assistant', 'value': '',},],
@@ -348,7 +349,10 @@ def neva_process_prompts(prompt, tokenizer, multimodal_cfg, num_media_latents, c
         sources = preprocess_multimodal(
             copy.deepcopy(list_data_dict), multimodal_cfg, num_media_latents
         )  # HARDCODED FOR NOW
-        data_dict = preprocess_nvgpt(sources, tokenizer, multimodal_cfg)
+        if multimodal_cfg["conv_template"] in ["nvgpt", "nv_steerlm"]:
+            data_dict = preprocess_nvgpt(sources, tokenizer, multimodal_cfg)
+        else:
+            data_dict = preprocess_nv_dpo(sources, tokenizer, multimodal_cfg)
 
     elif multimodal_cfg["conv_template"] == "llama_2":
         record = {

From d865931500c5e344134288a447866bac74f061db Mon Sep 17 00:00:00 2001
From: HuiyingLi <willwin.lee@gmail.com>
Date: Wed, 7 Feb 2024 17:35:30 -0800
Subject: [PATCH 2/7] add nv_dpo conversation to accomendate empty system
 message

Signed-off-by: HuiyingLi <willwin.lee@gmail.com>
---
 .../multimodal/data/neva/conversation.py          | 15 +++++++++++++--
 .../multimodal/data/neva/neva_dataset.py          |  3 +--
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/nemo/collections/multimodal/data/neva/conversation.py b/nemo/collections/multimodal/data/neva/conversation.py
index 744329b47ed4..d51a5f973f99 100644
--- a/nemo/collections/multimodal/data/neva/conversation.py
+++ b/nemo/collections/multimodal/data/neva/conversation.py
@@ -263,6 +263,17 @@ def dict(self):
     sep2=f"{DEFAULT_SYSTEM_TOKEN}System\n",
 )
 
+conv_nv_dpo = Conversation(
+    system="\n",
+    roles=("User", "Assistant"),
+    version="nv_dpo",
+    messages=(),
+    offset=0,
+    sep_style=SeparatorStyle.NVGPT,
+    sep=DEFAULT_SEPARATOR_TOKEN,
+    sep2=f"{DEFAULT_SYSTEM_TOKEN}System\n",
+)
+
 conv_vicuna_v0 = Conversation(
     system="A chat between a curious human and an artificial intelligence assistant. "
     "The assistant gives helpful, detailed, and polite answers to the human's questions.",
@@ -400,8 +411,8 @@ def dict(self):
     "v1_mmtag": conv_llava_v1_mmtag,
     "llava_llama_2": conv_llava_llama_2,
     "nvgpt": conv_nvgpt,
-    "nv_dpo": conv_nvgpt,
-    "nv_steerlm": conv_nvgpt
+    "nv_steerlm": conv_nvgpt,
+    "nv_dpo": conv_nv_dpo,
 }
 
 
diff --git a/nemo/collections/multimodal/data/neva/neva_dataset.py b/nemo/collections/multimodal/data/neva/neva_dataset.py
index 5434392ce482..ea778cd59145 100644
--- a/nemo/collections/multimodal/data/neva/neva_dataset.py
+++ b/nemo/collections/multimodal/data/neva/neva_dataset.py
@@ -487,7 +487,7 @@ def preprocess_nv_dpo(sources: dict, tokenizer, cfg,) -> Dict:
     """<extra_id_0>System\n\n<extra_id_1>User\n{user input}\n<extra_id_1>Assistant\n"""
 
 
-    conv = conversation_lib.conv_nvgpt.copy()
+    conv = conversation_lib.conv_nv_dpo.copy()
 
     # Apply prompt templates
     conversations = []
@@ -543,7 +543,6 @@ def preprocess_nv_dpo(sources: dict, tokenizer, cfg,) -> Dict:
 
             instruction_len = len(tokenizer.text_to_ids(parts[0] + sep))
             round_len = len(tokenizer.text_to_ids(rou + conv.sep))
-            import pdb; pdb.set_trace()
             target[cur_len : cur_len + instruction_len] = IGNORE_INDEX
 
             cur_len += round_len

From a062856f9a4dc53292a22ede334ad559c4c2fb62 Mon Sep 17 00:00:00 2001
From: HuiyingLi <willwin.lee@gmail.com>
Date: Thu, 8 Feb 2024 16:41:14 -0800
Subject: [PATCH 3/7] handle nv_dpo template text generation

Signed-off-by: HuiyingLi <willwin.lee@gmail.com>
---
 .../nlp/modules/common/text_generation_strategy.py            | 2 +-
 nemo/collections/nlp/modules/common/text_generation_utils.py  | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/nemo/collections/nlp/modules/common/text_generation_strategy.py b/nemo/collections/nlp/modules/common/text_generation_strategy.py
index f7e72e629d74..38388b1c9c39 100644
--- a/nemo/collections/nlp/modules/common/text_generation_strategy.py
+++ b/nemo/collections/nlp/modules/common/text_generation_strategy.py
@@ -337,7 +337,7 @@ def neva_process_prompts(prompt, tokenizer, multimodal_cfg, num_media_latents, c
     list_data_dict = []
     if multimodal_cfg["conv_template"] in ["nvgpt", "nv_steerlm", "nv_dpo"]:
         record = {
-            'system': 'A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user\'s questions.\n\n',
+            'system': '\n' if multimodal_cfg["conv_template"]=='nv_dpo' else 'A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user\'s questions.\n\n',
             'conversations': [{'from': 'User', 'value': prompt}, {'from': 'Assistant', 'value': '',},],
         }
 
diff --git a/nemo/collections/nlp/modules/common/text_generation_utils.py b/nemo/collections/nlp/modules/common/text_generation_utils.py
index c6a8f1e46900..da8cc4e8faae 100644
--- a/nemo/collections/nlp/modules/common/text_generation_utils.py
+++ b/nemo/collections/nlp/modules/common/text_generation_utils.py
@@ -181,7 +181,7 @@ def megatron_neva_generate(model, prompt_dict_list, length_params, sampling_para
 
         clean_response = clean_text
 
-        if conv_template == "nvgpt":
+        if conv_template in ["nvgpt", "nv_steerlm"]:
             labels_str_regexp = re.compile(f"<extra_id_2>quality:.*\n")
             last_match_end_position = None
             for match in re.finditer(labels_str_regexp, clean_response):
@@ -189,6 +189,8 @@ def megatron_neva_generate(model, prompt_dict_list, length_params, sampling_para
             if last_match_end_position is not None:
                 clean_response = clean_response[last_match_end_position:]
             clean_response = clean_response.strip("<extra_id_1>")
+        elif conv_template == 'nv_dpo':
+            clean_response = clean_response.split("<extra_id_1>")[-2].strip().split("\n")[-1]
         elif conv_template == "llama_2":
             clean_response = clean_response.rsplit("[/INST] ", 1)[-1]
         elif conv_template == "v1":

From 360b6125f292065ade300529e95f87c3d7a167ef Mon Sep 17 00:00:00 2001
From: HuiyingLi <willwin.lee@gmail.com>
Date: Thu, 15 Feb 2024 23:21:41 -0800
Subject: [PATCH 4/7] add prompt string to nvgpt

Signed-off-by: HuiyingLi <willwin.lee@gmail.com>
---
 nemo/collections/multimodal/data/neva/neva_dataset.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/nemo/collections/multimodal/data/neva/neva_dataset.py b/nemo/collections/multimodal/data/neva/neva_dataset.py
index ea778cd59145..38617460a5ad 100644
--- a/nemo/collections/multimodal/data/neva/neva_dataset.py
+++ b/nemo/collections/multimodal/data/neva/neva_dataset.py
@@ -381,6 +381,8 @@ def preprocess_nvgpt(sources: dict, tokenizer, cfg,) -> Dict:
     - The function asserts that each message in a conversation alternates between the defined roles and skips messages not starting with the 'human' role.
     """
 
+    """<extra_id_0>System\nA chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\n\n<extra_id_1>User\n{user input}\n<extra_id_1>Assistant\n<extra_id_2>quality:4,toxicity:0,humor:0,creativity:0,helpfulness:4,correctness:4,coherence:4,complexity:4,verbosity:4\n"""
+
     conv = conversation_lib.conv_nvgpt.copy()
 
     # Apply prompt templates

From b4d7d248d896d7d5801f8ae885eec758bc62bbf1 Mon Sep 17 00:00:00 2001
From: HuiyingLi <willwin.lee@gmail.com>
Date: Thu, 15 Feb 2024 23:23:17 -0800
Subject: [PATCH 5/7] bugfix for inference prompt template

Signed-off-by: HuiyingLi <willwin.lee@gmail.com>
---
 nemo/collections/multimodal/data/neva/neva_dataset.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/nemo/collections/multimodal/data/neva/neva_dataset.py b/nemo/collections/multimodal/data/neva/neva_dataset.py
index 38617460a5ad..39f6b09d5524 100644
--- a/nemo/collections/multimodal/data/neva/neva_dataset.py
+++ b/nemo/collections/multimodal/data/neva/neva_dataset.py
@@ -512,7 +512,8 @@ def preprocess_nv_dpo(sources: dict, tokenizer, cfg,) -> Dict:
                 conv.append_message(turn['from'], turn['value'])
         context = conv.get_prompt()
         if strip_end_for_inference:
-            context = context.rstrip("\n<extra_id_1>") + "\n"
+            if context.endswith("\n<extra_id_1>"):
+                context = context[:-len("\n<extra_id_1>")] + "\n"
         conversations.append(context)
 
     add_extra_token = cfg.get("add_extra_token")

From d6212410bcc2719afeb99a9826b1bc263cfff28d Mon Sep 17 00:00:00 2001
From: Huiying Li <willwin.lee@gmail.com>
Date: Tue, 20 Feb 2024 09:56:38 -0800
Subject: [PATCH 6/7] bug fix for grabbing clean text

Signed-off-by: Huiying Li <willwin.lee@gmail.com>
---
 nemo/collections/nlp/modules/common/text_generation_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nemo/collections/nlp/modules/common/text_generation_utils.py b/nemo/collections/nlp/modules/common/text_generation_utils.py
index da8cc4e8faae..1e7c3f763831 100644
--- a/nemo/collections/nlp/modules/common/text_generation_utils.py
+++ b/nemo/collections/nlp/modules/common/text_generation_utils.py
@@ -190,7 +190,7 @@ def megatron_neva_generate(model, prompt_dict_list, length_params, sampling_para
                 clean_response = clean_response[last_match_end_position:]
             clean_response = clean_response.strip("<extra_id_1>")
         elif conv_template == 'nv_dpo':
-            clean_response = clean_response.split("<extra_id_1>")[-2].strip().split("\n")[-1]
+            clean_response = clean_response.split("<extra_id_1>")[-2][10:] #[10:] for removing "Assistant\n"
         elif conv_template == "llama_2":
             clean_response = clean_response.rsplit("[/INST] ", 1)[-1]
         elif conv_template == "v1":

From 45ef3bddfa9fe3b99757844b905415839aad13d8 Mon Sep 17 00:00:00 2001
From: Huiying Li <willwin.lee@gmail.com>
Date: Wed, 21 Feb 2024 11:06:00 -0800
Subject: [PATCH 7/7] fix code format

Signed-off-by: Huiying Li <willwin.lee@gmail.com>
---
 nemo/collections/multimodal/data/neva/neva_dataset.py    | 9 ++-------
 .../nlp/modules/common/text_generation_strategy.py       | 6 ++++--
 .../nlp/modules/common/text_generation_utils.py          | 2 +-
 3 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/nemo/collections/multimodal/data/neva/neva_dataset.py b/nemo/collections/multimodal/data/neva/neva_dataset.py
index 39f6b09d5524..15d755a7d59a 100644
--- a/nemo/collections/multimodal/data/neva/neva_dataset.py
+++ b/nemo/collections/multimodal/data/neva/neva_dataset.py
@@ -488,7 +488,6 @@ def preprocess_nv_dpo(sources: dict, tokenizer, cfg,) -> Dict:
 
     """<extra_id_0>System\n\n<extra_id_1>User\n{user input}\n<extra_id_1>Assistant\n"""
 
-
     conv = conversation_lib.conv_nv_dpo.copy()
 
     # Apply prompt templates
@@ -513,7 +512,7 @@ def preprocess_nv_dpo(sources: dict, tokenizer, cfg,) -> Dict:
         context = conv.get_prompt()
         if strip_end_for_inference:
             if context.endswith("\n<extra_id_1>"):
-                context = context[:-len("\n<extra_id_1>")] + "\n"
+                context = context[: -len("\n<extra_id_1>")] + "\n"
         conversations.append(context)
 
     add_extra_token = cfg.get("add_extra_token")
@@ -551,10 +550,8 @@ def preprocess_nv_dpo(sources: dict, tokenizer, cfg,) -> Dict:
             cur_len += round_len
         target[cur_len:] = IGNORE_INDEX
 
-
     # Check if masking working correctly
-    #print(tokenizer.ids_to_text([a[0] for a in filter(lambda x:x[1]!=-1 ,[x for x in zip(tokens[0].numpy().tolist(), labels[0].numpy().tolist())])]))
-    #print([x for x in zip(tokens[0].numpy().tolist(), labels[0].numpy().tolist())])
+    # print([x for x in zip(tokens[0].numpy().tolist(), labels[0].numpy().tolist())])
 
     if add_extra_token:
         tokens = tokens[:, :-1].contiguous()
@@ -563,8 +560,6 @@ def preprocess_nv_dpo(sources: dict, tokenizer, cfg,) -> Dict:
         labels = torch.roll(labels, shifts=-1, dims=-1)
         labels[:, -1] = IGNORE_INDEX
 
-
-
     return dict(tokens=tokens, labels=labels,)
 
 
diff --git a/nemo/collections/nlp/modules/common/text_generation_strategy.py b/nemo/collections/nlp/modules/common/text_generation_strategy.py
index 38388b1c9c39..59452ce96f99 100644
--- a/nemo/collections/nlp/modules/common/text_generation_strategy.py
+++ b/nemo/collections/nlp/modules/common/text_generation_strategy.py
@@ -329,15 +329,17 @@ def neva_process_prompts(prompt, tokenizer, multimodal_cfg, num_media_latents, c
         DEFAULT_IMAGE_TOKEN,
         preprocess_llama_2,
         preprocess_multimodal,
-        preprocess_nvgpt,
         preprocess_nv_dpo,
+        preprocess_nvgpt,
         preprocess_v1,
     )
 
     list_data_dict = []
     if multimodal_cfg["conv_template"] in ["nvgpt", "nv_steerlm", "nv_dpo"]:
         record = {
-            'system': '\n' if multimodal_cfg["conv_template"]=='nv_dpo' else 'A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user\'s questions.\n\n',
+            'system': '\n'
+            if multimodal_cfg["conv_template"] == 'nv_dpo'
+            else 'A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user\'s questions.\n\n',
             'conversations': [{'from': 'User', 'value': prompt}, {'from': 'Assistant', 'value': '',},],
         }
 
diff --git a/nemo/collections/nlp/modules/common/text_generation_utils.py b/nemo/collections/nlp/modules/common/text_generation_utils.py
index 1e7c3f763831..7946b846c7cd 100644
--- a/nemo/collections/nlp/modules/common/text_generation_utils.py
+++ b/nemo/collections/nlp/modules/common/text_generation_utils.py
@@ -190,7 +190,7 @@ def megatron_neva_generate(model, prompt_dict_list, length_params, sampling_para
                 clean_response = clean_response[last_match_end_position:]
             clean_response = clean_response.strip("<extra_id_1>")
         elif conv_template == 'nv_dpo':
-            clean_response = clean_response.split("<extra_id_1>")[-2][10:] #[10:] for removing "Assistant\n"
+            clean_response = clean_response.split("<extra_id_1>")[-2][10:]  # [10:] for removing "Assistant\n"
         elif conv_template == "llama_2":
             clean_response = clean_response.rsplit("[/INST] ", 1)[-1]
         elif conv_template == "v1":