From 0e2f97c754ce8ca60d0c6ef366377b775f231c98 Mon Sep 17 00:00:00 2001
From: Sizhe Liu <liusz@smail.nju.edu.cn>
Date: Tue, 16 Jul 2024 20:14:27 +0800
Subject: [PATCH] fix the missing truncation=True in llm/predictor.py

---
 llm/predict/predictor.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llm/predict/predictor.py b/llm/predict/predictor.py
index 3ae5a22e8810..da5a3e62f553 100644
--- a/llm/predict/predictor.py
+++ b/llm/predict/predictor.py
@@ -974,6 +974,7 @@ def _preprocess(self, source):
                 text,
                 return_tensors="np",
                 padding=True,
+                truncation=True,
                 max_length=self.config.src_length,
                 # if use chat_template, it will not add special_tokens
                 add_special_tokens=self.tokenizer.chat_template is None
@@ -1224,7 +1225,7 @@ def predict(self, input_texts: str | list[str]):
     def _preprocess(self, source):
         BlockInferencePredictorMixin._preprocess(self, source)
         for i, text in enumerate(source):
-            tokens = self.tokenizer(text, return_tensors="np", padding=False, max_length=(self.config.src_length))
+            tokens = self.tokenizer(text, return_tensors="np", padding=False, truncation=True, max_length=(self.config.src_length))
             input_ids = tokens["input_ids"][0]
             length = len(input_ids)
             need_block_nums = (